From f9f95e168a5d18b341b77efa66f3cebdb830bc17 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 5 Apr 2006 10:30:18 +0200 Subject: [PATCH 01/95] post-commit: Removed commit messages to public lists (for now) BitKeeper/triggers/post-commit: Removed commit messages to public lists (for now) --- BitKeeper/triggers/post-commit | 45 ---------------------------------- 1 file changed, 45 deletions(-) diff --git a/BitKeeper/triggers/post-commit b/BitKeeper/triggers/post-commit index ce5e03b0ac9..782a4c4dca8 100755 --- a/BitKeeper/triggers/post-commit +++ b/BitKeeper/triggers/post-commit @@ -66,51 +66,6 @@ EOF bk cset -r+ -d ) | $SENDMAIL -t -#++ -# commits@ mail -#-- - echo "Notifying commits list at $COMMITS" - ( - cat < -From: $FROM -To: $COMMITS -Subject: bk commit into $VERSION tree ($CHANGESET)$BS -X-CSetKey: <$CSETKEY> -$BH -Below is the list of changes that have just been committed into a local -$VERSION repository of $USER. When $USER does a push these changes will -be propagated to the main repository and, within 24 hours after the -push, to the public repository. -For information on how to access the public repository -see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html - -EOF - bk changes -v -r+ - bk cset -r+ -d - ) | head -n $LIMIT | $SENDMAIL -t - -#++ -# docs-commit@ mail -# Picks up anything under the Docs subdirectory (relevant for docs team). -#-- - bk changes -v -r+ | grep -q " Docs/" - if [ $? -eq 0 ] - then - echo "Notifying docs list at $DOCS" - ( - cat < -From: $FROM -To: $DOCS -Subject: bk commit - $VERSION tree (Manual) ($CHANGESET)$BS - -EOF - bk changes -v -r+ - bk cset -r+ -d - ) | $SENDMAIL -t - fi - else echo "commit failed because '$BK_STATUS', you may need to re-clone..." fi From d2b8d744d9f9ab32478416885d7929ad00d3b276 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 11 Apr 2006 16:45:10 +0300 Subject: [PATCH 02/95] Added storage/maria (based on MyISAM). WL#3245 Moved things into ft_global.h, my_handler.h and myisamchk.h to allow MyISAM and Maria to share code and defines Rename of not properly renamed functions in MyISAM and my_handler.c Renamed some MI_ defines to HA_ to allow MyISAM and Maria to share Added maria variables to mysqld.cc and set_var.cc Fixed compiler warnings BitKeeper/etc/ignore: added storage/maria/*.MAI BUILD/SETUP.sh: Compile maria by default BitKeeper/triggers/post-commit: No public maria emails. Mark changesets emails with 'maria' configure.in: Add maria include/ft_global.h: Move defines needed by maria and MyISAM here include/keycache.h: Add support for default key_cache if cache not found include/my_base.h: Add invalidator_by_filename include/my_handler.h: Remove duplicate header files Add defines that are typical for handlers (MyISAM and Maria) include/myisam.h: Move things to my_handler.h to allow Maria and MyISAM to share things (Some things needed to be shared to allow sharing of HA_CHECK structure) libmysqld/Makefile.am: Added ha_maria.cc mysys/mf_keycaches.c: Added default value for multi_key_cache_search mysys/my_handler.c: mi_compare_text -> ha_compare_text Removed compiler warnings sql/ha_myisam.cc: MI_CHECK -> HA_CHECK MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH sql/ha_myisam.h: MI_CHECK -> HA_CHECK MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH sql/ha_myisammrg.h: MI_CHECK -> HA_CHECK MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH sql/handler.h: Added MARIA Added inclusion of my_handler.h sql/item_func.h: Remove duplicate include sql/mysql_priv.h: Added maria variables sql/mysqld.cc: Added maria sql/set_var.cc: Added maria status variables sql/set_var.h: Added maria sql/sql_class.h: Added maria status variables sql/sql_sort.h: Remove duplicate BUFFPEK struct storage/Makefile.am: Added maria storage/csv/ha_tina.cc: Removed compiler warning storage/myisam/Makefile.am: Added ft_myisam.c storage/myisam/ft_boolean_search.c: mi_compare_text -> ha_compare_text MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF Remove compiler warnings storage/myisam/ft_nlq_search.c: mi_compare_text -> ha_compare_text storage/myisam/ft_parser.c: mi_compare_text -> ha_compare_text storage/myisam/ft_static.c: Move ft_init_search() to ft_myisam.c to make ft_static.c independent of MyISAM storage/myisam/ft_stopwords.c: mi_compare_text -> ha_compare_text storage/myisam/ft_update.c: mi_compare_text -> ha_compare_text storage/myisam/fulltext.h: Move things to ft_global.h to allow to share more things between MyISAM and Maria storage/myisam/mi_check.c: MI_CHECK -> HA_CHECK storage/myisam/mi_create.c: MI_MAX_POSSIBLE_KEY -> HA_MAX_POSSIBLE_KEY MI_MAX_KEY_BLOCK_SIZE -> HA_MAX_KEY_BLOCK_SIZE MI_MAX_KEY_SEG -> HA_MAX_KEY_SEG MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_delete.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_delete_all.c: Remove not used variable storage/myisam/mi_dynrec.c: _my_calc_total_blob_length -> _mi_calc_total_blob_length storage/myisam/mi_key.c: _my_store_blob_length -> _mi_store_blob_length storage/myisam/mi_log.c: _my_calc_total_blob_length -> _mi_calc_total_blob_length storage/myisam/mi_open.c: MI_MAX_POSSIBLE_KEY -> HA_MAX_POSSIBLE_KEY MI_MAX_KEY_SEG -> HA_MAX_KEY_SEG MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF my_n_base_info_read -> mi_n_base_info_read storage/myisam/mi_packrec.c: Made read_pack_length static _my_store_blob_length -> _mi_store_blob_length Remove not used variable storage/myisam/mi_range.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_search.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_test1.c: MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH storage/myisam/mi_test2.c: Fixed compiler warning storage/myisam/mi_unique.c: Fixed compiler warning mi_compare_text -> ha_compare_text storage/myisam/mi_update.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_write.c: Rename of defines and functions storage/myisam/myisamchk.c: Rename of defines and functions storage/myisam/myisamdef.h: Remove tabs Indentation fixes (Large changes as I did run indent-ex on the file) Move some things to myisamchk.h Added missing functions that gave compiler warnings storage/myisam/myisamlog.c: Rename of defines and functions storage/myisam/myisampack.c: Remove compiler warning storage/myisam/rt_index.c: Rename of defines and functions storage/myisam/sort.c: Rename of defines, functions and structures config/ac-macros/ha_maria.m4: New BitKeeper file ``config/ac-macros/ha_maria.m4'' include/maria.h: New BitKeeper file ``include/maria.h'' include/myisamchk.h: New BitKeeper file ``include/myisamchk.h'' libmysqld/ha_maria.cc: New BitKeeper file ``libmysqld/ha_maria.cc'' mysql-test/include/have_maria.inc: New BitKeeper file ``mysql-test/include/have_maria.inc'' mysql-test/r/have_maria.require: New BitKeeper file ``mysql-test/r/have_maria.require'' mysql-test/r/maria.result: New BitKeeper file ``mysql-test/r/maria.result'' mysql-test/r/ps_maria.result: New BitKeeper file ``mysql-test/r/ps_maria.result'' mysql-test/t/maria.test: New BitKeeper file ``mysql-test/t/maria.test'' mysql-test/t/ps_maria.test: New BitKeeper file ``mysql-test/t/ps_maria.test'' sql/ha_maria.cc: New BitKeeper file ``sql/ha_maria.cc'' sql/ha_maria.h: New BitKeeper file ``sql/ha_maria.h'' storage/maria/Makefile.am: New BitKeeper file ``storage/maria/Makefile.am'' storage/maria/cmakelists.txt: New BitKeeper file ``storage/maria/cmakelists.txt'' storage/maria/ft_maria.c: New BitKeeper file ``storage/maria/ft_maria.c'' storage/maria/ma_cache.c: New BitKeeper file ``storage/maria/ma_cache.c'' storage/maria/ma_changed.c: New BitKeeper file ``storage/maria/ma_changed.c'' storage/maria/ma_check.c: New BitKeeper file ``storage/maria/ma_check.c'' storage/maria/ma_checksum.c: New BitKeeper file ``storage/maria/ma_checksum.c'' storage/maria/ma_close.c: New BitKeeper file ``storage/maria/ma_close.c'' storage/maria/ma_create.c: New BitKeeper file ``storage/maria/ma_create.c'' storage/maria/ma_dbug.c: New BitKeeper file ``storage/maria/ma_dbug.c'' storage/maria/ma_delete.c: New BitKeeper file ``storage/maria/ma_delete.c'' storage/maria/ma_delete_all.c: New BitKeeper file ``storage/maria/ma_delete_all.c'' storage/maria/ma_delete_table.c: New BitKeeper file ``storage/maria/ma_delete_table.c'' storage/maria/ma_dynrec.c: New BitKeeper file ``storage/maria/ma_dynrec.c'' storage/maria/ma_extra.c: New BitKeeper file ``storage/maria/ma_extra.c'' storage/maria/ma_ft_boolean_search.c: New BitKeeper file ``storage/maria/ma_ft_boolean_search.c'' storage/maria/ma_ft_eval.c: New BitKeeper file ``storage/maria/ma_ft_eval.c'' storage/maria/ma_ft_eval.h: New BitKeeper file ``storage/maria/ma_ft_eval.h'' storage/maria/ma_ft_nlq_search.c: New BitKeeper file ``storage/maria/ma_ft_nlq_search.c'' storage/maria/ma_ft_parser.c: New BitKeeper file ``storage/maria/ma_ft_parser.c'' storage/maria/ma_ft_stem.c: New BitKeeper file ``storage/maria/ma_ft_stem.c'' storage/maria/ma_ft_test1.c: New BitKeeper file ``storage/maria/ma_ft_test1.c'' storage/maria/ma_ft_test1.h: New BitKeeper file ``storage/maria/ma_ft_test1.h'' storage/maria/ma_ft_update.c: New BitKeeper file ``storage/maria/ma_ft_update.c'' storage/maria/ma_ftdefs.h: New BitKeeper file ``storage/maria/ma_ftdefs.h'' storage/maria/ma_fulltext.h: New BitKeeper file ``storage/maria/ma_fulltext.h'' storage/maria/ma_info.c: New BitKeeper file ``storage/maria/ma_info.c'' storage/maria/ma_init.c: New BitKeeper file ``storage/maria/ma_init.c'' storage/maria/ma_key.c: New BitKeeper file ``storage/maria/ma_key.c'' storage/maria/ma_keycache.c: New BitKeeper file ``storage/maria/ma_keycache.c'' storage/maria/ma_locking.c: New BitKeeper file ``storage/maria/ma_locking.c'' storage/maria/ma_log.c: New BitKeeper file ``storage/maria/ma_log.c'' storage/maria/ma_open.c: New BitKeeper file ``storage/maria/ma_open.c'' storage/maria/ma_packrec.c: New BitKeeper file ``storage/maria/ma_packrec.c'' storage/maria/ma_page.c: New BitKeeper file ``storage/maria/ma_page.c'' storage/maria/ma_panic.c: New BitKeeper file ``storage/maria/ma_panic.c'' storage/maria/ma_preload.c: New BitKeeper file ``storage/maria/ma_preload.c'' storage/maria/ma_range.c: New BitKeeper file ``storage/maria/ma_range.c'' storage/maria/ma_rename.c: New BitKeeper file ``storage/maria/ma_rename.c'' storage/maria/ma_rfirst.c: New BitKeeper file ``storage/maria/ma_rfirst.c'' storage/maria/ma_rkey.c: New BitKeeper file ``storage/maria/ma_rkey.c'' storage/maria/ma_rlast.c: New BitKeeper file ``storage/maria/ma_rlast.c'' storage/maria/ma_rnext.c: New BitKeeper file ``storage/maria/ma_rnext.c'' storage/maria/ma_rnext_same.c: New BitKeeper file ``storage/maria/ma_rnext_same.c'' storage/maria/ma_rprev.c: New BitKeeper file ``storage/maria/ma_rprev.c'' storage/maria/ma_rrnd.c: New BitKeeper file ``storage/maria/ma_rrnd.c'' storage/maria/ma_rsame.c: New BitKeeper file ``storage/maria/ma_rsame.c'' storage/maria/ma_rsamepos.c: New BitKeeper file ``storage/maria/ma_rsamepos.c'' storage/maria/ma_rt_index.c: New BitKeeper file ``storage/maria/ma_rt_index.c'' storage/maria/ma_rt_index.h: New BitKeeper file ``storage/maria/ma_rt_index.h'' storage/maria/ma_rt_key.c: New BitKeeper file ``storage/maria/ma_rt_key.c'' storage/maria/ma_rt_key.h: New BitKeeper file ``storage/maria/ma_rt_key.h'' storage/maria/ma_rt_mbr.c: New BitKeeper file ``storage/maria/ma_rt_mbr.c'' storage/maria/ma_rt_mbr.h: New BitKeeper file ``storage/maria/ma_rt_mbr.h'' storage/maria/ma_rt_split.c: New BitKeeper file ``storage/maria/ma_rt_split.c'' storage/maria/ma_rt_test.c: New BitKeeper file ``storage/maria/ma_rt_test.c'' storage/maria/ma_scan.c: New BitKeeper file ``storage/maria/ma_scan.c'' storage/maria/ma_search.c: New BitKeeper file ``storage/maria/ma_search.c'' storage/maria/ma_sort.c: New BitKeeper file ``storage/maria/ma_sort.c'' storage/maria/ma_sp_defs.h: New BitKeeper file ``storage/maria/ma_sp_defs.h'' storage/maria/ma_sp_key.c: New BitKeeper file ``storage/maria/ma_sp_key.c'' storage/maria/ma_sp_test.c: New BitKeeper file ``storage/maria/ma_sp_test.c'' storage/maria/ma_static.c: New BitKeeper file ``storage/maria/ma_static.c'' storage/maria/ma_statrec.c: New BitKeeper file ``storage/maria/ma_statrec.c'' storage/maria/ma_test1.c: New BitKeeper file ``storage/maria/ma_test1.c'' storage/maria/ma_test2.c: New BitKeeper file ``storage/maria/ma_test2.c'' storage/maria/ma_test3.c: New BitKeeper file ``storage/maria/ma_test3.c'' storage/maria/ma_test_all.sh: New BitKeeper file ``storage/maria/ma_test_all.sh'' storage/maria/ma_unique.c: New BitKeeper file ``storage/maria/ma_unique.c'' storage/maria/ma_update.c: New BitKeeper file ``storage/maria/ma_update.c'' storage/maria/ma_write.c: New BitKeeper file ``storage/maria/ma_write.c'' storage/maria/maria_chk.c: New BitKeeper file ``storage/maria/maria_chk.c'' storage/maria/maria_def.h: New BitKeeper file ``storage/maria/maria_def.h'' storage/maria/maria_ftdump.c: New BitKeeper file ``storage/maria/maria_ftdump.c'' storage/maria/maria_log.c: New BitKeeper file ``storage/maria/maria_log.c'' storage/maria/maria_pack.c: New BitKeeper file ``storage/maria/maria_pack.c'' storage/maria/maria_rename.sh: New BitKeeper file ``storage/maria/maria_rename.sh'' storage/maria/test_pack: New BitKeeper file ``storage/maria/test_pack'' storage/myisam/ft_myisam.c: New BitKeeper file ``storage/myisam/ft_myisam.c'' --- .bzrignore | 13 + BUILD/SETUP.sh | 2 +- BitKeeper/triggers/post-commit | 47 +- config/ac-macros/ha_maria.m4 | 29 + configure.in | 4 + include/ft_global.h | 11 + include/keycache.h | 3 +- include/maria.h | 418 +++ include/my_base.h | 4 +- include/my_handler.h | 27 +- include/myisam.h | 260 +- include/myisamchk.h | 160 + libmysqld/Makefile.am | 4 +- mysql-test/include/have_maria.inc | 4 + mysql-test/r/have_maria.require | 2 + mysql-test/r/maria.result | 1417 +++++++++ mysql-test/r/ps_maria.result | 3137 +++++++++++++++++++ mysql-test/t/maria.test | 802 +++++ mysql-test/t/ps_maria.test | 46 + mysys/mf_keycaches.c | 14 +- mysys/my_handler.c | 30 +- sql/ha_maria.cc | 1836 +++++++++++ sql/ha_maria.h | 141 + sql/ha_myisam.cc | 38 +- sql/ha_myisam.h | 7 +- sql/ha_myisammrg.h | 4 +- sql/handler.h | 2 + sql/item_func.h | 1 - sql/mysql_priv.h | 6 + sql/mysqld.cc | 81 +- sql/set_var.cc | 36 +- sql/set_var.h | 1 + sql/sql_class.h | 5 +- sql/sql_sort.h | 11 +- storage/Makefile.am | 2 +- storage/csv/ha_tina.cc | 3 +- storage/maria/Makefile.am | 106 + storage/maria/cmakelists.txt | 26 + storage/maria/ft_maria.c | 49 + storage/maria/ma_cache.c | 108 + storage/maria/ma_changed.c | 34 + storage/maria/ma_check.c | 4301 ++++++++++++++++++++++++++ storage/maria/ma_checksum.c | 65 + storage/maria/ma_close.c | 124 + storage/maria/ma_create.c | 816 +++++ storage/maria/ma_dbug.c | 193 ++ storage/maria/ma_delete.c | 890 ++++++ storage/maria/ma_delete_all.c | 79 + storage/maria/ma_delete_table.c | 58 + storage/maria/ma_dynrec.c | 1811 +++++++++++ storage/maria/ma_extra.c | 426 +++ storage/maria/ma_ft_boolean_search.c | 955 ++++++ storage/maria/ma_ft_eval.c | 254 ++ storage/maria/ma_ft_eval.h | 42 + storage/maria/ma_ft_nlq_search.c | 366 +++ storage/maria/ma_ft_parser.c | 394 +++ storage/maria/ma_ft_stem.c | 19 + storage/maria/ma_ft_test1.c | 317 ++ storage/maria/ma_ft_test1.h | 421 +++ storage/maria/ma_ft_update.c | 359 +++ storage/maria/ma_ftdefs.h | 149 + storage/maria/ma_fulltext.h | 28 + storage/maria/ma_info.c | 133 + storage/maria/ma_init.c | 59 + storage/maria/ma_key.c | 592 ++++ storage/maria/ma_keycache.c | 163 + storage/maria/ma_locking.c | 554 ++++ storage/maria/ma_log.c | 164 + storage/maria/ma_open.c | 1288 ++++++++ storage/maria/ma_packrec.c | 1346 ++++++++ storage/maria/ma_page.c | 160 + storage/maria/ma_panic.c | 124 + storage/maria/ma_preload.c | 117 + storage/maria/ma_range.c | 244 ++ storage/maria/ma_rename.c | 61 + storage/maria/ma_rfirst.c | 27 + storage/maria/ma_rkey.c | 144 + storage/maria/ma_rlast.c | 27 + storage/maria/ma_rnext.c | 122 + storage/maria/ma_rnext_same.c | 105 + storage/maria/ma_rprev.c | 88 + storage/maria/ma_rrnd.c | 60 + storage/maria/ma_rsame.c | 66 + storage/maria/ma_rsamepos.c | 56 + storage/maria/ma_rt_index.c | 1081 +++++++ storage/maria/ma_rt_index.h | 47 + storage/maria/ma_rt_key.c | 100 + storage/maria/ma_rt_key.h | 33 + storage/maria/ma_rt_mbr.c | 801 +++++ storage/maria/ma_rt_mbr.h | 38 + storage/maria/ma_rt_split.c | 350 +++ storage/maria/ma_rt_test.c | 473 +++ storage/maria/ma_scan.c | 46 + storage/maria/ma_search.c | 1894 ++++++++++++ storage/maria/ma_sort.c | 1021 ++++++ storage/maria/ma_sp_defs.h | 48 + storage/maria/ma_sp_key.c | 300 ++ storage/maria/ma_sp_test.c | 568 ++++ storage/maria/ma_static.c | 65 + storage/maria/ma_statrec.c | 301 ++ storage/maria/ma_test1.c | 681 ++++ storage/maria/ma_test2.c | 1050 +++++++ storage/maria/ma_test3.c | 502 +++ storage/maria/ma_test_all.sh | 147 + storage/maria/ma_unique.c | 234 ++ storage/maria/ma_update.c | 232 ++ storage/maria/ma_write.c | 1033 +++++++ storage/maria/maria_chk.c | 1824 +++++++++++ storage/maria/maria_def.h | 751 +++++ storage/maria/maria_ftdump.c | 279 ++ storage/maria/maria_log.c | 848 +++++ storage/maria/maria_pack.c | 3202 +++++++++++++++++++ storage/maria/maria_rename.sh | 17 + storage/maria/test_pack | 10 + storage/myisam/Makefile.am | 3 +- storage/myisam/ft_boolean_search.c | 16 +- storage/myisam/ft_myisam.c | 36 + storage/myisam/ft_nlq_search.c | 2 +- storage/myisam/ft_parser.c | 2 +- storage/myisam/ft_static.c | 13 - storage/myisam/ft_stopwords.c | 2 +- storage/myisam/ft_update.c | 4 +- storage/myisam/fulltext.h | 10 - storage/myisam/mi_check.c | 112 +- storage/myisam/mi_create.c | 8 +- storage/myisam/mi_delete.c | 12 +- storage/myisam/mi_delete_all.c | 1 - storage/myisam/mi_dynrec.c | 10 +- storage/myisam/mi_key.c | 2 +- storage/myisam/mi_log.c | 2 +- storage/myisam/mi_open.c | 15 +- storage/myisam/mi_packrec.c | 6 +- storage/myisam/mi_range.c | 2 +- storage/myisam/mi_search.c | 10 +- storage/myisam/mi_test1.c | 2 +- storage/myisam/mi_test2.c | 2 +- storage/myisam/mi_unique.c | 4 +- storage/myisam/mi_update.c | 2 +- storage/myisam/mi_write.c | 12 +- storage/myisam/myisamchk.c | 33 +- storage/myisam/myisamdef.h | 857 +++-- storage/myisam/myisamlog.c | 2 +- storage/myisam/myisampack.c | 1 + storage/myisam/rt_index.c | 4 +- storage/myisam/sort.c | 7 +- 145 files changed, 44957 insertions(+), 911 deletions(-) create mode 100644 config/ac-macros/ha_maria.m4 create mode 100644 include/maria.h create mode 100644 include/myisamchk.h create mode 100644 mysql-test/include/have_maria.inc create mode 100644 mysql-test/r/have_maria.require create mode 100644 mysql-test/r/maria.result create mode 100644 mysql-test/r/ps_maria.result create mode 100644 mysql-test/t/maria.test create mode 100644 mysql-test/t/ps_maria.test create mode 100644 sql/ha_maria.cc create mode 100644 sql/ha_maria.h create mode 100644 storage/maria/Makefile.am create mode 100644 storage/maria/cmakelists.txt create mode 100644 storage/maria/ft_maria.c create mode 100644 storage/maria/ma_cache.c create mode 100644 storage/maria/ma_changed.c create mode 100644 storage/maria/ma_check.c create mode 100644 storage/maria/ma_checksum.c create mode 100644 storage/maria/ma_close.c create mode 100644 storage/maria/ma_create.c create mode 100644 storage/maria/ma_dbug.c create mode 100644 storage/maria/ma_delete.c create mode 100644 storage/maria/ma_delete_all.c create mode 100644 storage/maria/ma_delete_table.c create mode 100644 storage/maria/ma_dynrec.c create mode 100644 storage/maria/ma_extra.c create mode 100644 storage/maria/ma_ft_boolean_search.c create mode 100644 storage/maria/ma_ft_eval.c create mode 100644 storage/maria/ma_ft_eval.h create mode 100644 storage/maria/ma_ft_nlq_search.c create mode 100644 storage/maria/ma_ft_parser.c create mode 100644 storage/maria/ma_ft_stem.c create mode 100644 storage/maria/ma_ft_test1.c create mode 100644 storage/maria/ma_ft_test1.h create mode 100644 storage/maria/ma_ft_update.c create mode 100644 storage/maria/ma_ftdefs.h create mode 100644 storage/maria/ma_fulltext.h create mode 100644 storage/maria/ma_info.c create mode 100644 storage/maria/ma_init.c create mode 100644 storage/maria/ma_key.c create mode 100644 storage/maria/ma_keycache.c create mode 100644 storage/maria/ma_locking.c create mode 100644 storage/maria/ma_log.c create mode 100644 storage/maria/ma_open.c create mode 100644 storage/maria/ma_packrec.c create mode 100644 storage/maria/ma_page.c create mode 100644 storage/maria/ma_panic.c create mode 100644 storage/maria/ma_preload.c create mode 100644 storage/maria/ma_range.c create mode 100644 storage/maria/ma_rename.c create mode 100644 storage/maria/ma_rfirst.c create mode 100644 storage/maria/ma_rkey.c create mode 100644 storage/maria/ma_rlast.c create mode 100644 storage/maria/ma_rnext.c create mode 100644 storage/maria/ma_rnext_same.c create mode 100644 storage/maria/ma_rprev.c create mode 100644 storage/maria/ma_rrnd.c create mode 100644 storage/maria/ma_rsame.c create mode 100644 storage/maria/ma_rsamepos.c create mode 100644 storage/maria/ma_rt_index.c create mode 100644 storage/maria/ma_rt_index.h create mode 100644 storage/maria/ma_rt_key.c create mode 100644 storage/maria/ma_rt_key.h create mode 100644 storage/maria/ma_rt_mbr.c create mode 100644 storage/maria/ma_rt_mbr.h create mode 100644 storage/maria/ma_rt_split.c create mode 100644 storage/maria/ma_rt_test.c create mode 100644 storage/maria/ma_scan.c create mode 100644 storage/maria/ma_search.c create mode 100644 storage/maria/ma_sort.c create mode 100644 storage/maria/ma_sp_defs.h create mode 100644 storage/maria/ma_sp_key.c create mode 100644 storage/maria/ma_sp_test.c create mode 100644 storage/maria/ma_static.c create mode 100644 storage/maria/ma_statrec.c create mode 100644 storage/maria/ma_test1.c create mode 100644 storage/maria/ma_test2.c create mode 100644 storage/maria/ma_test3.c create mode 100755 storage/maria/ma_test_all.sh create mode 100644 storage/maria/ma_unique.c create mode 100644 storage/maria/ma_update.c create mode 100644 storage/maria/ma_write.c create mode 100644 storage/maria/maria_chk.c create mode 100644 storage/maria/maria_def.h create mode 100644 storage/maria/maria_ftdump.c create mode 100644 storage/maria/maria_log.c create mode 100644 storage/maria/maria_pack.c create mode 100755 storage/maria/maria_rename.sh create mode 100755 storage/maria/test_pack create mode 100644 storage/myisam/ft_myisam.c diff --git a/.bzrignore b/.bzrignore index 4b6b0411a47..2969dfab09c 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1759,3 +1759,16 @@ vio/viotest.cpp zlib/*.ds? zlib/*.vcproj mysql-test/r/*.log +storage/maria/ma_test1 +storage/maria/ma_test2 +storage/maria/ma_test3 +storage/maria/maria_ftdump +storage/maria/ma_sp_test +storage/maria/ma_rt_test +storage/maria/maria_log +storage/maria/maria_pack +storage/maria/maria_chk +storage/maria/ma_test_all +storage/maria/maria.log +storage/maria/*.MAD +storage/maria/*.MAI diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index a6e18e26bfc..035f297be7e 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -84,7 +84,7 @@ debug_extra_cflags="-O1 -Wuninitialized" base_cxxflags="-felide-constructors -fno-exceptions -fno-rtti" amd64_cxxflags="" # If dropping '--with-big-tables', add here "-DBIG_TABLES" -base_configs="$prefix_configs --enable-assembler --with-extra-charsets=complex --enable-thread-safe-client --with-readline --with-big-tables" +base_configs="$prefix_configs --enable-assembler --with-extra-charsets=complex --enable-thread-safe-client --with-readline --with-big-tables --with-maria-storage-engine" static_link="--with-mysqld-ldflags=-all-static --with-client-ldflags=-all-static" amd64_configs="" alpha_configs="" # Not used yet diff --git a/BitKeeper/triggers/post-commit b/BitKeeper/triggers/post-commit index ce5e03b0ac9..d6d678a7c22 100755 --- a/BitKeeper/triggers/post-commit +++ b/BitKeeper/triggers/post-commit @@ -5,7 +5,7 @@ FROM=$USER@mysql.com COMMITS=commits@lists.mysql.com DOCS=docs-commit@mysql.com LIMIT=10000 -VERSION="5.1" +VERSION="maria" if [ "$REAL_EMAIL" = "" ] then @@ -66,51 +66,6 @@ EOF bk cset -r+ -d ) | $SENDMAIL -t -#++ -# commits@ mail -#-- - echo "Notifying commits list at $COMMITS" - ( - cat < -From: $FROM -To: $COMMITS -Subject: bk commit into $VERSION tree ($CHANGESET)$BS -X-CSetKey: <$CSETKEY> -$BH -Below is the list of changes that have just been committed into a local -$VERSION repository of $USER. When $USER does a push these changes will -be propagated to the main repository and, within 24 hours after the -push, to the public repository. -For information on how to access the public repository -see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html - -EOF - bk changes -v -r+ - bk cset -r+ -d - ) | head -n $LIMIT | $SENDMAIL -t - -#++ -# docs-commit@ mail -# Picks up anything under the Docs subdirectory (relevant for docs team). -#-- - bk changes -v -r+ | grep -q " Docs/" - if [ $? -eq 0 ] - then - echo "Notifying docs list at $DOCS" - ( - cat < -From: $FROM -To: $DOCS -Subject: bk commit - $VERSION tree (Manual) ($CHANGESET)$BS - -EOF - bk changes -v -r+ - bk cset -r+ -d - ) | $SENDMAIL -t - fi - else echo "commit failed because '$BK_STATUS', you may need to re-clone..." fi diff --git a/config/ac-macros/ha_maria.m4 b/config/ac-macros/ha_maria.m4 new file mode 100644 index 00000000000..56ec8720a09 --- /dev/null +++ b/config/ac-macros/ha_maria.m4 @@ -0,0 +1,29 @@ +dnl --------------------------------------------------------------------------- +dnl Macro: MYSQL_CHECK_MARIA +dnl Sets HAVE_MARIA_DB if --with-maria-storage-engine is used +dnl --------------------------------------------------------------------------- +AC_DEFUN([MYSQL_CHECK_MARIA], [ + AC_ARG_WITH([maria-storage-engine], + [ + --with-maria-storage-engine + Enable the Maria Storage Engine], + [mariadb="$withval"], + [mariadb=no]) + AC_MSG_CHECKING([for Maria storage engine]) + + case "$mariadb" in + yes ) + AC_DEFINE([HAVE_MARIA_DB], [1], [Builds Maria Storage Engine]) + AC_MSG_RESULT([yes]) + [mariadb=yes] + ;; + * ) + AC_MSG_RESULT([no]) + [mariadb=no] + ;; + esac + +]) +dnl --------------------------------------------------------------------------- +dnl END OF MYSQL_CHECK_MARIA SECTION +dnl --------------------------------------------------------------------------- diff --git a/configure.in b/configure.in index 0253a3a6771..03c68d65137 100644 --- a/configure.in +++ b/configure.in @@ -2483,6 +2483,10 @@ MYSQL_STORAGE_ENGINE(ndbcluster,,ndbcluster,,,,storage/ndb,,,[ MYSQL_SETUP_NDBCLUSTER ]) MYSQL_STORAGE_ENGINE(partition,,partition) +MYSQL_STORAGE_ENGINE(maria,,,,,,storage/maria,, + \$(top_builddir)/storage/maria/libmaria.a, [ + AC_CONFIG_FILES(storage/maria/Makefile) +]) # If we have threads generate some library functions and test programs sql_server_dirs= diff --git a/include/ft_global.h b/include/ft_global.h index 8f02e48f61d..494e89af8aa 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -66,6 +66,17 @@ void ft_free_stopwords(void); FT_INFO *ft_init_search(uint,void *, uint, byte *, uint,CHARSET_INFO *, byte *); my_bool ft_boolean_check_syntax_string(const byte *); +/* Internal symbols for fulltext between maria and MyISAM */ + +#define HA_FT_WTYPE HA_KEYTYPE_FLOAT +#define HA_FT_WLEN 4 +#define FT_SEGS 2 + +#define ft_sintXkorr(A) mi_sint4korr(A) +#define ft_intXstore(T,A) mi_int4store(T,A) + +extern const HA_KEYSEG ft_keysegs[FT_SEGS]; + #ifdef __cplusplus } #endif diff --git a/include/keycache.h b/include/keycache.h index 9fe1cce5da5..ba5ef1c3560 100644 --- a/include/keycache.h +++ b/include/keycache.h @@ -128,7 +128,8 @@ extern void end_key_cache(KEY_CACHE *keycache, my_bool cleanup); /* Functions to handle multiple key caches */ extern my_bool multi_keycache_init(void); extern void multi_keycache_free(void); -extern KEY_CACHE *multi_key_cache_search(byte *key, uint length); +extern KEY_CACHE *multi_key_cache_search(byte *key, uint length, + KEY_CACHE *def); extern my_bool multi_key_cache_set(const byte *key, uint length, KEY_CACHE *key_cache); extern void multi_key_cache_change(KEY_CACHE *old_data, diff --git a/include/maria.h b/include/maria.h new file mode 100644 index 00000000000..c7586143fe7 --- /dev/null +++ b/include/maria.h @@ -0,0 +1,418 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file should be included when using maria_funktions */ + +#ifndef _maria_h +#define _maria_h +#ifdef __cplusplus +extern "C" { +#endif +#ifndef _my_base_h +#include +#endif +#ifndef _m_ctype_h +#include +#endif +#ifndef _keycache_h +#include "keycache.h" +#endif +#include "my_handler.h" +#include "ft_global.h" +#include +#include + +/* + Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details +*/ + +#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY +#define MARIA_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */ +#else +#define MARIA_MAX_KEY MAX_INDEXES /* Max allowed keys */ +#endif + +#define MARIA_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */ +#define MARIA_NAME_IEXT ".MAI" +#define MARIA_NAME_DEXT ".MAD" +/* Max extra space to use when sorting keys */ +#define MARIA_MAX_TEMP_LENGTH 2*1024L*1024L*1024L +/* Possible values for maria_block_size (must be power of 2) */ +#define MARIA_KEY_BLOCK_LENGTH 8192 /* default key block length */ +#define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ +#define MARIA_MAX_KEY_BLOCK_LENGTH 32768 +#define maria_portable_sizeof_char_ptr 8 + +/* + In the following macros '_keyno_' is 0 .. keys-1. + If there can be more keys than bits in the key_map, the highest bit + is for all upper keys. They cannot be switched individually. + This means that clearing of high keys is ignored, setting one high key + sets all high keys. +*/ +#define MARIA_KEYMAP_BITS (8 * SIZEOF_LONG_LONG) +#define MARIA_KEYMAP_HIGH_MASK (ULL(1) << (MARIA_KEYMAP_BITS - 1)) +#define maria_get_mask_all_keys_active(_keys_) \ + (((_keys_) < MARIA_KEYMAP_BITS) ? \ + ((ULL(1) << (_keys_)) - ULL(1)) : \ + (~ ULL(0))) +#if MARIA_MAX_KEY > MARIA_KEYMAP_BITS +#define maria_is_key_active(_keymap_,_keyno_) \ + (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + test((_keymap_) & (ULL(1) << (_keyno_))) : \ + test((_keymap_) & MARIA_KEYMAP_HIGH_MASK)) +#define maria_set_key_active(_keymap_,_keyno_) \ + (_keymap_)|= (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + (ULL(1) << (_keyno_)) : \ + MARIA_KEYMAP_HIGH_MASK) +#define maria_clear_key_active(_keymap_,_keyno_) \ + (_keymap_)&= (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + (~ (ULL(1) << (_keyno_))) : \ + (~ (ULL(0))) /*ignore*/ ) +#else +#define maria_is_key_active(_keymap_,_keyno_) \ + test((_keymap_) & (ULL(1) << (_keyno_))) +#define maria_set_key_active(_keymap_,_keyno_) \ + (_keymap_)|= (ULL(1) << (_keyno_)) +#define maria_clear_key_active(_keymap_,_keyno_) \ + (_keymap_)&= (~ (ULL(1) << (_keyno_))) +#endif +#define maria_is_any_key_active(_keymap_) \ + test((_keymap_)) +#define maria_is_all_keys_active(_keymap_,_keys_) \ + ((_keymap_) == maria_get_mask_all_keys_active(_keys_)) +#define maria_set_all_keys_active(_keymap_,_keys_) \ + (_keymap_)= maria_get_mask_all_keys_active(_keys_) +#define maria_clear_all_keys_active(_keymap_) \ + (_keymap_)= 0 +#define maria_intersect_keys_active(_to_,_from_) \ + (_to_)&= (_from_) +#define maria_is_any_intersect_keys_active(_keymap1_,_keys_,_keymap2_) \ + ((_keymap1_) & (_keymap2_) & \ + maria_get_mask_all_keys_active(_keys_)) +#define maria_copy_keys_active(_to_,_maxkeys_,_from_) \ + (_to_)= (maria_get_mask_all_keys_active(_maxkeys_) & \ + (_from_)) + + /* Param to/from maria_info */ + +typedef struct st_maria_isaminfo /* Struct from h_info */ +{ + ha_rows records; /* Records in database */ + ha_rows deleted; /* Deleted records in database */ + my_off_t recpos; /* Pos for last used record */ + my_off_t newrecpos; /* Pos if we write new record */ + my_off_t dupp_key_pos; /* Position to record with dup key */ + my_off_t data_file_length; /* Length of data file */ + my_off_t max_data_file_length, index_file_length; + my_off_t max_index_file_length, delete_length; + ulong reclength; /* Recordlength */ + ulong mean_reclength; /* Mean recordlength (if packed) */ + ulonglong auto_increment; + ulonglong key_map; /* Which keys are used */ + char *data_file_name, *index_file_name; + uint keys; /* Number of keys in use */ + uint options; /* HA_OPTION_... used */ + int errkey, /* With key was dupplicated on err */ + sortkey; /* clustered by this key */ + File filenr; /* (uniq) filenr for datafile */ + time_t create_time; /* When table was created */ + time_t check_time; + time_t update_time; + uint reflength; + ulong record_offset; + ulong *rec_per_key; /* for sql optimizing */ +} MARIA_INFO; + + +typedef struct st_maria_create_info +{ + const char *index_file_name, *data_file_name; /* If using symlinks */ + ha_rows max_rows; + ha_rows reloc_rows; + ulonglong auto_increment; + ulonglong data_file_length; + ulonglong key_file_length; + uint old_options; + uint8 language; + my_bool with_auto_increment; +} MARIA_CREATE_INFO; + +struct st_maria_info; /* For referense */ +struct st_maria_share; +typedef struct st_maria_info MARIA_HA; +struct st_maria_s_param; + +typedef struct st_maria_keydef /* Key definition with open & info */ +{ + struct st_maria_share *share; /* Pointer to base (set in open) */ + uint16 keysegs; /* Number of key-segment */ + uint16 flag; /* NOSAME, PACK_USED */ + + uint8 key_alg; /* BTREE, RTREE */ + uint16 block_length; /* Length of keyblock (auto) */ + uint16 underflow_block_length; /* When to execute underflow */ + uint16 keylength; /* Tot length of keyparts (auto) */ + uint16 minlength; /* min length of (packed) key (auto) */ + uint16 maxlength; /* max length of (packed) key (auto) */ + uint16 block_size; /* block_size (auto) */ + uint32 version; /* For concurrent read/write */ + uint32 ftparser_nr; /* distinct ftparser number */ + + HA_KEYSEG *seg, *end; + struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */ + int(*bin_search) (struct st_maria_info *info, + struct st_maria_keydef *keyinfo, uchar *page, uchar *key, + uint key_len, uint comp_flag, uchar **ret_pos, + uchar *buff, my_bool *was_last_key); + uint(*get_key) (struct st_maria_keydef *keyinfo, uint nod_flag, + uchar **page, uchar *key); + int(*pack_key) (struct st_maria_keydef *keyinfo, uint nod_flag, + uchar *next_key, uchar *org_key, uchar *prev_key, + uchar *key, struct st_maria_s_param *s_temp); + void(*store_key) (struct st_maria_keydef *keyinfo, uchar *key_pos, + struct st_maria_s_param *s_temp); + int(*ck_insert) (struct st_maria_info *inf, uint k_nr, uchar *k, uint klen); + int(*ck_delete) (struct st_maria_info *inf, uint k_nr, uchar *k, uint klen); +} MARIA_KEYDEF; + + +#define MARIA_UNIQUE_HASH_LENGTH 4 + +typedef struct st_maria_unique_def /* Segment definition of unique */ +{ + uint16 keysegs; /* Number of key-segment */ + uchar key; /* Mapped to which key */ + uint8 null_are_equal; + HA_KEYSEG *seg, *end; +} MARIA_UNIQUEDEF; + +typedef struct st_maria_decode_tree /* Decode huff-table */ +{ + uint16 *table; + uint quick_table_bits; + byte *intervalls; +} MARIA_DECODE_TREE; + + +struct st_maria_bit_buff; + +/* + Note that null markers should always be first in a row ! + When creating a column, one should only specify: + type, length, null_bit and null_pos +*/ + +typedef struct st_maria_columndef /* column information */ +{ + int16 type; /* en_fieldtype */ + uint16 length; /* length of field */ + uint32 offset; /* Offset to position in row */ + uint8 null_bit; /* If column may be 0 */ + uint16 null_pos; /* position for null marker */ + +#ifndef NOT_PACKED_DATABASES + void(*unpack) (struct st_maria_columndef *rec, + struct st_maria_bit_buff *buff, + uchar *start, uchar *end); + enum en_fieldtype base_type; + uint space_length_bits, pack_type; + MARIA_DECODE_TREE *huff_tree; +#endif +} MARIA_COLUMNDEF; + + +extern my_string maria_log_filename; /* Name of logfile */ +extern ulong maria_block_size; +extern ulong maria_concurrent_insert; +extern my_bool maria_flush, maria_delay_key_write, maria_single_user; +extern my_off_t maria_max_temp_length; +extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size; +extern KEY_CACHE maria_key_cache_var, *maria_key_cache; + + + /* Prototypes for maria-functions */ + +extern int maria_init(void); +extern void maria_end(void); +extern int maria_close(struct st_maria_info *file); +extern int maria_delete(struct st_maria_info *file, const byte *buff); +extern struct st_maria_info *maria_open(const char *name, int mode, + uint wait_if_locked); +extern int maria_panic(enum ha_panic_function function); +extern int maria_rfirst(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rkey(struct st_maria_info *file, byte *buf, int inx, + const byte *key, + uint key_len, enum ha_rkey_function search_flag); +extern int maria_rlast(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rnext(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rnext_same(struct st_maria_info *info, byte *buf); +extern int maria_rprev(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rrnd(struct st_maria_info *file, byte *buf, my_off_t pos); +extern int maria_scan_init(struct st_maria_info *file); +extern int maria_scan(struct st_maria_info *file, byte *buf); +extern int maria_rsame(struct st_maria_info *file, byte *record, int inx); +extern int maria_rsame_with_pos(struct st_maria_info *file, byte *record, + int inx, my_off_t pos); +extern int maria_update(struct st_maria_info *file, const byte *old, + byte *new_record); +extern int maria_write(struct st_maria_info *file, byte *buff); +extern my_off_t maria_position(struct st_maria_info *file); +extern int maria_status(struct st_maria_info *info, MARIA_INFO *x, uint flag); +extern int maria_lock_database(struct st_maria_info *file, int lock_type); +extern int maria_create(const char *name, uint keys, MARIA_KEYDEF *keydef, + uint columns, MARIA_COLUMNDEF *columndef, + uint uniques, MARIA_UNIQUEDEF *uniquedef, + MARIA_CREATE_INFO *create_info, uint flags); +extern int maria_delete_table(const char *name); +extern int maria_rename(const char *from, const char *to); +extern int maria_extra(struct st_maria_info *file, + enum ha_extra_function function, void *extra_arg); +extern ha_rows maria_records_in_range(struct st_maria_info *info, int inx, + key_range *min_key, key_range *max_key); +extern int maria_logging(int activate_log); +extern int maria_is_changed(struct st_maria_info *info); +extern int maria_delete_all_rows(struct st_maria_info *info); +extern uint maria_get_pointer_length(ulonglong file_length, uint def); + + +/* this is used to pass to mysql_mariachk_table */ + +#define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */ +#define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */ + +typedef struct st_maria_sort_info +{ +#ifdef THREAD + /* sync things */ + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif + MARIA_HA *info; + HA_CHECK *param; + char *buff; + SORT_KEY_BLOCKS *key_block, *key_block_end; + SORT_FT_BUF *ft_buf; + + my_off_t filelength, dupp, buff_length; + ha_rows max_records; + uint current_key, total_keys; + uint got_error, threads_running; + myf myf_rw; + enum data_file_type new_data_file_type; +} MARIA_SORT_INFO; + + +typedef struct st_maria_sort_param +{ + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + + MARIA_KEYDEF *keyinfo; + MARIA_SORT_INFO *sort_info; + HA_KEYSEG *seg; + uchar **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + char *record; + MY_TMPDIR *tmpdir; + + /* + The next two are used to collect statistics, see maria_update_key_parts for + description. + */ + ulonglong unique[HA_MAX_KEY_SEG+1]; + ulonglong notnull[HA_MAX_KEY_SEG+1]; + + my_off_t pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + + int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *); + int (*key_read)(struct st_maria_sort_param *,void *); + int (*key_write)(struct st_maria_sort_param *, const void *); + void (*lock_in_memory)(HA_CHECK *); + NEAR int (*write_keys)(struct st_maria_sort_param *, register uchar **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,char *, + uint, uint); +} MARIA_SORT_PARAM; + + +/* functions in maria_check */ +void mariachk_init(HA_CHECK *param); +int maria_chk_status(HA_CHECK *param, MARIA_HA *info); +int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag); +int maria_chk_size(HA_CHECK *param, MARIA_HA *info); +int maria_chk_key(HA_CHECK *param, MARIA_HA *info); +int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, int extend); +int maria_repair(HA_CHECK *param, register MARIA_HA *info, + my_string name, int rep_quick); +int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, + my_string name); +int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, + const char *name, int rep_quick); +int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, + const char *name, int rep_quick); +int maria_change_to_newfile(const char *filename, const char *old_ext, + const char *new_ext, uint raid_chunks, myf myflags); +int maria_lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, + const char *filetype, const char *filename); +void maria_lock_memory(HA_CHECK *param); +int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update); +void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, + ulonglong *unique, ulonglong *notnull, + ulonglong records); +int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start, + my_off_t length, const char *type); +int maria_movepoint(MARIA_HA *info, byte *record, my_off_t oldpos, + my_off_t newpos, uint prot_key); +int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile); +int maria_test_if_almost_full(MARIA_HA *info); +int maria_recreate_table(HA_CHECK *param, MARIA_HA ** org_info, char *filename); +int maria_disable_indexes(MARIA_HA *info); +int maria_enable_indexes(MARIA_HA *info); +int maria_indexes_are_disabled(MARIA_HA *info); +void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows); +my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, ulonglong key_map, + my_bool force); + +int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows); +void maria_flush_bulk_insert(MARIA_HA *info, uint inx); +void maria_end_bulk_insert(MARIA_HA *info); +int maria_assign_to_key_cache(MARIA_HA *info, ulonglong key_map, + KEY_CACHE *key_cache); +void maria_change_key_cache(KEY_CACHE *old_key_cache, + KEY_CACHE *new_key_cache); +int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves); + +/* fulltext functions */ +FT_INFO *maria_ft_init_search(uint,void *, uint, byte *, uint, + CHARSET_INFO *, byte *); + +/* 'Almost-internal' Maria functions */ + +void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, + my_bool repair); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/include/my_base.h b/include/my_base.h index b9a806cc02f..5378f4f470b 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -15,7 +15,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* This file includes constants used with all databases */ -/* Author: Michael Widenius */ #ifndef _my_base_h #define _my_base_h @@ -487,4 +486,7 @@ typedef ulong ha_rows; #define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 :2) +/* invalidator function reference for Query Cache */ +typedef void (* invalidator_by_filename)(const char * filename); + #endif /* _my_base_h */ diff --git a/include/my_handler.h b/include/my_handler.h index d531e0fb3e1..9b086036389 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -18,11 +18,30 @@ #ifndef _my_handler_h #define _my_handler_h -#include "my_global.h" -#include "my_base.h" -#include "m_ctype.h" #include "myisampack.h" +/* + There is a hard limit for the maximum number of keys as there are only + 8 bits in the index file header for the number of keys in a table. + This means that 0..255 keys can exist for a table. The idea of + HA_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on + a MyISAM table for which one has more keys than MyISAM is normally + compiled for. If you don't have this, you will get a core dump when + running myisamchk compiled for 128 keys on a table with 255 keys. +*/ + +#define HA_MAX_POSSIBLE_KEY 255 /* For myisamchk */ +/* + The following defines can be increased if necessary. + But beware the dependency of HA_MAX_POSSIBLE_KEY_BUFF and HA_MAX_KEY_LENGTH. +*/ + +#define HA_MAX_KEY_LENGTH 1000 /* Max length in bytes */ +#define HA_MAX_KEY_SEG 16 /* Max segments for key */ + +#define HA_MAX_POSSIBLE_KEY_BUFF (HA_MAX_KEY_LENGTH + 24+ 6+6) +#define HA_MAX_KEY_BUFF (HA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8) + typedef struct st_HA_KEYSEG /* Key-portion */ { CHARSET_INFO *charset; @@ -82,7 +101,7 @@ typedef struct st_HA_KEYSEG /* Key-portion */ #define clr_rec_bits(bit_ptr, bit_ofs, bit_len) \ set_rec_bits(0, bit_ptr, bit_ofs, bit_len) -extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , +extern int ha_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , my_bool, my_bool); extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, register uchar *b, uint key_length, uint nextflag, diff --git a/include/myisam.h b/include/myisam.h index 5116f48eeb5..b5f29741be0 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -32,33 +32,19 @@ extern "C" { #include "keycache.h" #endif #include "my_handler.h" +#include #include /* - There is a hard limit for the maximum number of keys as there are only - 8 bits in the index file header for the number of keys in a table. - This means that 0..255 keys can exist for a table. The idea of - MI_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on - a MyISAM table for which one has more keys than MyISAM is normally - compiled for. If you don't have this, you will get a core dump when - running myisamchk compiled for 128 keys on a table with 255 keys. + Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details */ -#define MI_MAX_POSSIBLE_KEY 255 /* For myisam_chk */ -#if MAX_INDEXES > MI_MAX_POSSIBLE_KEY -#define MI_MAX_KEY MI_MAX_POSSIBLE_KEY /* Max allowed keys */ + +#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY +#define MI_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */ #else #define MI_MAX_KEY MAX_INDEXES /* Max allowed keys */ #endif -#define MI_MAX_POSSIBLE_KEY_BUFF (1024+6+6) /* For myisam_chk */ -/* - The following defines can be increased if necessary. - But beware the dependency of MI_MAX_POSSIBLE_KEY_BUFF and MI_MAX_KEY_LENGTH. -*/ -#define MI_MAX_KEY_LENGTH 1000 /* Max length in bytes */ -#define MI_MAX_KEY_SEG 16 /* Max segments for key */ - -#define MI_MAX_KEY_BUFF (MI_MAX_KEY_LENGTH+MI_MAX_KEY_SEG*6+8+8) #define MI_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */ #define MI_NAME_IEXT ".MYI" #define MI_NAME_DEXT ".MYD" @@ -257,9 +243,6 @@ typedef struct st_columndef /* column information */ #endif } MI_COLUMNDEF; -/* invalidator function reference for Query Cache */ -typedef void (* invalidator_by_filename)(const char * filename); - extern my_string myisam_log_filename; /* Name of logfile */ extern ulong myisam_block_size; extern ulong myisam_concurrent_insert; @@ -311,195 +294,105 @@ extern int mi_delete_all_rows(struct st_myisam_info *info); extern ulong _mi_calc_blob_length(uint length , const byte *pos); extern uint mi_get_pointer_length(ulonglong file_length, uint def); -/* this is used to pass to mysql_myisamchk_table -- by Sasha Pachev */ +/* this is used to pass to mysql_myisamchk_table */ #define MYISAMCHK_REPAIR 1 /* equivalent to myisamchk -r */ #define MYISAMCHK_VERIFY 2 /* Verify, run repair if failure */ -/* - Definitions needed for myisamchk.c - - Entries marked as "QQ to be removed" are NOT used to - pass check/repair options to mi_check.c. They are used - internally by myisamchk.c or/and ha_myisam.cc and should NOT - be stored together with other flags. They should be removed - from the following list to make addition of new flags possible. -*/ - -#define T_AUTO_INC 1 -#define T_AUTO_REPAIR 2 /* QQ to be removed */ -#define T_BACKUP_DATA 4 -#define T_CALC_CHECKSUM 8 -#define T_CHECK 16 /* QQ to be removed */ -#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */ -#define T_CREATE_MISSING_KEYS 64 -#define T_DESCRIPT 128 -#define T_DONT_CHECK_CHECKSUM 256 -#define T_EXTEND 512 -#define T_FAST (1L << 10) /* QQ to be removed */ -#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */ -#define T_FORCE_UNIQUENESS (1L << 12) -#define T_INFO (1L << 13) -#define T_MEDIUM (1L << 14) -#define T_QUICK (1L << 15) /* QQ to be removed */ -#define T_READONLY (1L << 16) /* QQ to be removed */ -#define T_REP (1L << 17) -#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */ -#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */ -#define T_RETRY_WITHOUT_QUICK (1L << 20) -#define T_SAFE_REPAIR (1L << 21) -#define T_SILENT (1L << 22) -#define T_SORT_INDEX (1L << 23) /* QQ to be removed */ -#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */ -#define T_STATISTICS (1L << 25) -#define T_UNPACK (1L << 26) -#define T_UPDATE_STATE (1L << 27) -#define T_VERBOSE (1L << 28) -#define T_VERY_SILENT (1L << 29) -#define T_WAIT_FOREVER (1L << 30) -#define T_WRITE_LOOP ((ulong) 1L << 31) - -#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL) - -/* - Flags used by myisamchk.c or/and ha_myisam.cc that are NOT passed - to mi_check.c follows: -*/ - -#define TT_USEFRM 1 -#define TT_FOR_UPGRADE 2 - -#define O_NEW_INDEX 1 /* Bits set in out_flag */ -#define O_NEW_DATA 2 -#define O_DATA_LOST 4 - -/* these struct is used by my_check to tell it what to do */ - -typedef struct st_sort_key_blocks /* Used when sorting */ +typedef struct st_sort_info { - uchar *buff,*end_pos; - uchar lastkey[MI_MAX_POSSIBLE_KEY_BUFF]; - uint last_length; - int inited; -} SORT_KEY_BLOCKS; +#ifdef THREAD + /* sync things */ + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif + MI_INFO *info; + HA_CHECK *param; + char *buff; + SORT_KEY_BLOCKS *key_block, *key_block_end; + SORT_FT_BUF *ft_buf; - -/* - MyISAM supports several statistics collection methods. Currently statistics - collection method is not stored in MyISAM file and has to be specified for - each table analyze/repair operation in MI_CHECK::stats_method. -*/ - -typedef enum -{ - /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ - MI_STATS_METHOD_NULLS_NOT_EQUAL, - /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ - MI_STATS_METHOD_NULLS_EQUAL, - /* Ignore NULLs - count only tuples without NULLs in the index components */ - MI_STATS_METHOD_IGNORE_NULLS -} enum_mi_stats_method; - -typedef struct st_mi_check_param -{ - ulonglong auto_increment_value; - ulonglong max_data_file_length; - ulonglong keys_in_use; - ulonglong max_record_length; - my_off_t search_after_block; - my_off_t new_file_pos,key_file_blocks; - my_off_t keydata,totaldata,key_blocks,start_check_pos; - ha_rows total_records,total_deleted; - ha_checksum record_checksum,glob_crc; - ulong use_buffers,read_buffer_length,write_buffer_length, - sort_buffer_length,sort_key_blocks; - uint out_flag,warning_printed,error_printed,verbose; - uint opt_sort_key,total_files,max_level; - uint testflag, key_cache_block_size; - uint8 language; - my_bool using_global_keycache, opt_lock_memory, opt_follow_links; - my_bool retry_repair, force_sort, calc_checksum; - char temp_filename[FN_REFLEN],*isam_file_name; - MY_TMPDIR *tmpdir; - int tmpfile_createflag; + my_off_t filelength, dupp, buff_length; + ha_rows max_records; + uint current_key, total_keys; + uint got_error, threads_running; myf myf_rw; - IO_CACHE read_cache; + enum data_file_type new_data_file_type; +} MI_SORT_INFO; + + +typedef struct st_mi_sort_param +{ + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + MI_KEYDEF *keyinfo; + MI_SORT_INFO *sort_info; + HA_KEYSEG *seg; + uchar **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + char *record; + MY_TMPDIR *tmpdir; + /* The next two are used to collect statistics, see update_key_parts for description. */ - ulonglong unique_count[MI_MAX_KEY_SEG+1]; - ulonglong notnull_count[MI_MAX_KEY_SEG+1]; - - ha_checksum key_crc[MI_MAX_POSSIBLE_KEY]; - ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY]; - void *thd; - const char *db_name, *table_name; - const char *op_name; - enum_mi_stats_method stats_method; -} MI_CHECK; + ulonglong unique[HA_MAX_KEY_SEG+1]; + ulonglong notnull[HA_MAX_KEY_SEG+1]; -typedef struct st_sort_ft_buf -{ - uchar *buf, *end; - int count; - uchar lastkey[MI_MAX_KEY_BUFF]; -} SORT_FT_BUF; + my_off_t pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + + int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); + int (*key_read)(struct st_mi_sort_param *,void *); + int (*key_write)(struct st_mi_sort_param *, const void *); + void (*lock_in_memory)(HA_CHECK *); + NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, + uint, uint); +} MI_SORT_PARAM; -typedef struct st_sort_info -{ - my_off_t filelength,dupp,buff_length; - ha_rows max_records; - uint current_key, total_keys; - myf myf_rw; - enum data_file_type new_data_file_type; - MI_INFO *info; - MI_CHECK *param; - char *buff; - SORT_KEY_BLOCKS *key_block,*key_block_end; - SORT_FT_BUF *ft_buf; - /* sync things */ - uint got_error, threads_running; -#ifdef THREAD - pthread_mutex_t mutex; - pthread_cond_t cond; -#endif -} SORT_INFO; /* functions in mi_check */ -void myisamchk_init(MI_CHECK *param); -int chk_status(MI_CHECK *param, MI_INFO *info); -int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag); -int chk_size(MI_CHECK *param, MI_INFO *info); -int chk_key(MI_CHECK *param, MI_INFO *info); -int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend); -int mi_repair(MI_CHECK *param, register MI_INFO *info, +void myisamchk_init(HA_CHECK *param); +int chk_status(HA_CHECK *param, MI_INFO *info); +int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag); +int chk_size(HA_CHECK *param, MI_INFO *info); +int chk_key(HA_CHECK *param, MI_INFO *info); +int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend); +int mi_repair(HA_CHECK *param, register MI_INFO *info, my_string name, int rep_quick); -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name); -int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, +int mi_sort_index(HA_CHECK *param, register MI_INFO *info, my_string name); +int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick); -int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, +int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick); int change_to_newfile(const char * filename, const char * old_ext, const char * new_ext, uint raid_chunks, myf myflags); -int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, +int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, const char *filetype, const char *filename); -void lock_memory(MI_CHECK *param); -void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, +void lock_memory(HA_CHECK *param); +void update_auto_increment_key(HA_CHECK *param, MI_INFO *info, my_bool repair); -int update_state_info(MI_CHECK *param, MI_INFO *info,uint update); +int update_state_info(HA_CHECK *param, MI_INFO *info,uint update); void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong *notnull, ulonglong records); -int filecopy(MI_CHECK *param, File to,File from,my_off_t start, +int filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type); int movepoint(MI_INFO *info,byte *record,my_off_t oldpos, my_off_t newpos, uint prot_key); -int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile); +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile); int test_if_almost_full(MI_INFO *info); -int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename); +int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename); void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows); my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, ulonglong key_map, my_bool force); @@ -513,6 +406,13 @@ void mi_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves); +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile); +int flush_pending_blocks(MI_SORT_PARAM *param); +int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); +int thr_write_keys(MI_SORT_PARAM *sort_param); +int sort_write_record(MI_SORT_PARAM *sort_param); +int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); + #ifdef __cplusplus } #endif diff --git a/include/myisamchk.h b/include/myisamchk.h new file mode 100644 index 00000000000..442fe5f7ece --- /dev/null +++ b/include/myisamchk.h @@ -0,0 +1,160 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Definitions needed for myisamchk/mariachk.c */ + +/* + Entries marked as "QQ to be removed" are NOT used to + pass check/repair options to xxx_check.c. They are used + internally by xxxchk.c or/and ha_xxxx.cc and should NOT + be stored together with other flags. They should be removed + from the following list to make addition of new flags possible. +*/ + +#ifndef _myisamchk_h +#define _myisamchk_h + +#define T_AUTO_INC 1 +#define T_AUTO_REPAIR 2 /* QQ to be removed */ +#define T_BACKUP_DATA 4 +#define T_CALC_CHECKSUM 8 +#define T_CHECK 16 /* QQ to be removed */ +#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */ +#define T_CREATE_MISSING_KEYS 64 +#define T_DESCRIPT 128 +#define T_DONT_CHECK_CHECKSUM 256 +#define T_EXTEND 512 +#define T_FAST (1L << 10) /* QQ to be removed */ +#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */ +#define T_FORCE_UNIQUENESS (1L << 12) +#define T_INFO (1L << 13) +#define T_MEDIUM (1L << 14) +#define T_QUICK (1L << 15) /* QQ to be removed */ +#define T_READONLY (1L << 16) /* QQ to be removed */ +#define T_REP (1L << 17) +#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */ +#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */ +#define T_RETRY_WITHOUT_QUICK (1L << 20) +#define T_SAFE_REPAIR (1L << 21) +#define T_SILENT (1L << 22) +#define T_SORT_INDEX (1L << 23) /* QQ to be removed */ +#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */ +#define T_STATISTICS (1L << 25) +#define T_UNPACK (1L << 26) +#define T_UPDATE_STATE (1L << 27) +#define T_VERBOSE (1L << 28) +#define T_VERY_SILENT (1L << 29) +#define T_WAIT_FOREVER (1L << 30) +#define T_WRITE_LOOP ((ulong) 1L << 31) + +#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL) + +/* + Flags used by xxxxchk.c or/and ha_xxxx.cc that are NOT passed + to xxxcheck.c follows: +*/ + +#define TT_USEFRM 1 +#define TT_FOR_UPGRADE 2 + +#define O_NEW_INDEX 1 /* Bits set in out_flag */ +#define O_NEW_DATA 2 +#define O_DATA_LOST 4 + +typedef struct st_sort_key_blocks /* Used when sorting */ +{ + uchar *buff, *end_pos; + uchar lastkey[HA_MAX_POSSIBLE_KEY_BUFF]; + uint last_length; + int inited; +} SORT_KEY_BLOCKS; + + +/* + MARIA/MYISAM supports several statistics collection + methods. Currently statistics collection method is not stored in + MARIA file and has to be specified for each table analyze/repair + operation in MI_CHECK::stats_method. +*/ + +typedef enum +{ + /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ + MI_STATS_METHOD_NULLS_NOT_EQUAL, + /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ + MI_STATS_METHOD_NULLS_EQUAL, + /* Ignore NULLs - count only tuples without NULLs in the index components */ + MI_STATS_METHOD_IGNORE_NULLS +} enum_handler_stats_method; + + +typedef struct st_handler_check_param +{ + char *isam_file_name; + MY_TMPDIR *tmpdir; + void *thd; + const char *db_name, *table_name, *op_name; + ulonglong auto_increment_value; + ulonglong max_data_file_length; + ulonglong keys_in_use; + ulonglong max_record_length; + /* + The next two are used to collect statistics, see update_key_parts for + description. + */ + ulonglong unique_count[HA_MAX_KEY_SEG + 1]; + ulonglong notnull_count[HA_MAX_KEY_SEG + 1]; + + my_off_t search_after_block; + my_off_t new_file_pos, key_file_blocks; + my_off_t keydata, totaldata, key_blocks, start_check_pos; + ha_rows total_records, total_deleted; + ha_checksum record_checksum, glob_crc; + ha_checksum key_crc[HA_MAX_POSSIBLE_KEY]; + ulong use_buffers, read_buffer_length, write_buffer_length; + ulong sort_buffer_length, sort_key_blocks; + ulong rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY]; + uint out_flag, warning_printed, error_printed, verbose; + uint opt_sort_key, total_files, max_level; + uint testflag, key_cache_block_size; + int tmpfile_createflag; + myf myf_rw; + uint8 language; + my_bool using_global_keycache, opt_lock_memory, opt_follow_links; + my_bool retry_repair, force_sort, calc_checksum; + char temp_filename[FN_REFLEN]; + IO_CACHE read_cache; + enum_handler_stats_method stats_method; +} HA_CHECK; + + +typedef struct st_sort_ftbuf +{ + uchar *buf, *end; + int count; + uchar lastkey[HA_MAX_KEY_BUFF]; +} SORT_FT_BUF; + + +typedef struct st_buffpek { + my_off_t file_pos; /* Where we are in the sort file */ + uchar *base,*key; /* Key pointers */ + ha_rows count; /* Number of rows in table */ + ulong mem_count; /* numbers of keys in memory */ + ulong max_keys; /* Max keys in buffert */ +} BUFFPEK; + +#endif /* _myisamchk_h */ diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index b5d437918ae..e4430f23a4c 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -73,8 +73,8 @@ sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) EXTRA_libmysqld_a_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \ ha_blackhole.cc ha_federated.cc ha_ndbcluster.cc \ - ha_ndbcluster_binlog.cc \ - ha_partition.cc + ha_ndbcluster_binlog.cc ha_partition.cc \ + ha_maria.cc libmysqld_a_DEPENDENCIES= @mysql_se_objs@ libmysqld_a_SOURCES= diff --git a/mysql-test/include/have_maria.inc b/mysql-test/include/have_maria.inc new file mode 100644 index 00000000000..955e2305ca5 --- /dev/null +++ b/mysql-test/include/have_maria.inc @@ -0,0 +1,4 @@ +-- require r/have_maria.require +disable_query_log; +show variables like "have_maria"; +enable_query_log; diff --git a/mysql-test/r/have_maria.require b/mysql-test/r/have_maria.require new file mode 100644 index 00000000000..02988af6976 --- /dev/null +++ b/mysql-test/r/have_maria.require @@ -0,0 +1,2 @@ +Variable_name Value +have_maria YES diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result new file mode 100644 index 00000000000..3b360873c36 --- /dev/null +++ b/mysql-test/r/maria.result @@ -0,0 +1,1417 @@ +drop table if exists t1,t2; +SET SQL_WARNINGS=1; +CREATE TABLE t1 ( +STRING_DATA char(255) default NULL, +KEY string_data (STRING_DATA) +) ENGINE=MARIA; +INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); +INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); +INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); +INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG'); +INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH'); +INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW'); +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)) engine=maria; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +delete from t1 where (a & 1); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)) engine=maria; +insert into t1 (b) values (1),(2),(2),(2),(2); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 a A 5 NULL NULL BTREE +t1 1 b 1 b A 1 NULL NULL BTREE +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status Table is already up to date +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 a A 5 NULL NULL BTREE +t1 1 b 1 b A 1 NULL NULL BTREE +drop table t1; +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)) engine=maria; +insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); +explain select * from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select a from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 4 Using index +explain select b from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 4 NULL 4 Using index +explain select a,b from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select a,b from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +explain select a,b,c from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +drop table t1; +CREATE TABLE t1 (a INT) engine=maria; +INSERT INTO t1 VALUES (1), (2), (3); +LOCK TABLES t1 WRITE; +INSERT INTO t1 VALUES (1), (2), (3); +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +DROP TABLE t1; +create table t1 ( t1 char(255), key(t1(250))) engine=maria; +insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); +insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); +insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); +insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907'); +insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011'); +insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101'); +insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564'); +insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002'); +insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834'); +insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016'); +insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899'); +insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482'); +insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139'); +insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477'); +insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755'); +insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188'); +insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454'); +insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656'); +insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741'); +insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178'); +insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049'); +insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635'); +insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573'); +insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878'); +insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077'); +delete from t1 where t1>'2'; +insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'), +('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'), +('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47'); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 +int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 +int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int, +i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34 +int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int, +i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51 +int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int, +i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68 +int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int, +i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85 +int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int, +i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102 +int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110 +int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118 +int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126 +int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134 +int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142 +int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150 +int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158 +int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166 +int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174 +int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182 +int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190 +int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198 +int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206 +int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214 +int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222 +int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230 +int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238 +int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246 +int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254 +int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262 +int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270 +int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278 +int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286 +int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294 +int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302 +int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310 +int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318 +int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326 +int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334 +int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342 +int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350 +int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358 +int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366 +int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374 +int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382 +int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390 +int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398 +int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406 +int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414 +int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422 +int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430 +int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438 +int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446 +int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454 +int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462 +int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470 +int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478 +int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486 +int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494 +int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502 +int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510 +int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518 +int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526 +int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534 +int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542 +int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550 +int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558 +int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566 +int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574 +int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582 +int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590 +int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598 +int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606 +int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614 +int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622 +int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630 +int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638 +int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646 +int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654 +int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662 +int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670 +int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678 +int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686 +int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694 +int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702 +int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710 +int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718 +int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726 +int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734 +int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742 +int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750 +int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758 +int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766 +int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774 +int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782 +int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790 +int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798 +int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806 +int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814 +int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822 +int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830 +int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838 +int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846 +int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854 +int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862 +int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870 +int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878 +int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886 +int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894 +int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902 +int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910 +int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918 +int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926 +int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934 +int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942 +int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950 +int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958 +int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966 +int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 +int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 +int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 +int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 +int, i999 int, i1000 int, b blob) row_format=dynamic engine=maria; +insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei"); +update t1 set b=repeat('a',256); +update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +delete from t1 where i8=1; +select i1,i2 from t1; +i1 i2 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +CREATE TABLE `t1` ( +`post_id` mediumint(8) unsigned NOT NULL auto_increment, +`topic_id` mediumint(8) unsigned NOT NULL default '0', +`post_time` datetime NOT NULL default '0000-00-00 00:00:00', +`post_text` text NOT NULL, +`icon_url` varchar(10) NOT NULL default '', +`sign` tinyint(1) unsigned NOT NULL default '0', +`post_edit` varchar(150) NOT NULL default '', +`poster_login` varchar(35) NOT NULL default '', +`ip` varchar(15) NOT NULL default '', +PRIMARY KEY (`post_id`), +KEY `post_time` (`post_time`), +KEY `ip` (`ip`), +KEY `poster_login` (`poster_login`), +KEY `topic_id` (`topic_id`), +FULLTEXT KEY `post_text` (`post_text`) +) ENGINE=MARIA; +INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)) engine=maria; +ERROR 42000: Specified key was too long; max key length is 1000 bytes +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)) engine=maria; +ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); +ERROR 42000: Specified key was too long; max key length is 1000 bytes +DROP TABLE t1; +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)) engine=maria; +INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)) engine=maria; +INSERT into t2 values (1,1,1), (2,2,2); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 b 1 b A 5 NULL NULL YES BTREE +t1 1 c 1 c A 5 NULL NULL YES BTREE +t1 1 a 1 a A 1 NULL NULL BTREE +t1 1 a 2 b A 5 NULL NULL YES BTREE +t1 1 c_2 1 c A 5 NULL NULL YES BTREE +t1 1 c_2 2 a A 5 NULL NULL BTREE +explain select * from t1,t2 where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ALL a NULL NULL NULL 4 Using where +explain select * from t1,t2 force index(a) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ALL a NULL NULL NULL 4 Using where +explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ref a a 4 test.t2.a 3 +explain select * from t1,t2 where t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 2 +1 SIMPLE t1 ref b b 5 test.t2.b 1 Using where +explain select * from t1,t2 force index(c) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 +1 SIMPLE t1 ALL a NULL NULL NULL 4 Using where +explain select * from t1 where a=0 or a=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where +explain select * from t1 force index (a) where a=0 or a=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 4 NULL 4 Using where +explain select * from t1 where c=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref c,c_2 c 5 const 1 Using where +explain select * from t1 use index() where c=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where +drop table t1,t2; +create table t1 (a int not null auto_increment primary key, b varchar(255)) engine=maria; +insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); +update t1 set b=repeat(left(b,1),200) where a=1; +delete from t1 where (a & 1)= 0; +update t1 set b=repeat('e',200) where a=1; +flush tables; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +update t1 set b=repeat(left(b,1),255) where a between 1 and 5; +update t1 set b=repeat(left(b,1),10) where a between 32 and 43; +update t1 set b=repeat(left(b,1),2) where a between 64 and 66; +update t1 set b=repeat(left(b,1),65) where a between 67 and 70; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +insert into t1 (b) values (repeat('z',100)); +update t1 set b="test" where left(b,1) > 'n'; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 ( a text not null, key a (a(20))) engine=maria; +insert into t1 values ('aaa '),('aaa'),('aa'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +select concat(a,'.') from t1 where a='aaa'; +concat(a,'.') +aaa . +aaa. +select concat(a,'.') from t1 where binary a='aaa'; +concat(a,'.') +aaa. +update t1 set a='bbb' where a='aaa'; +select concat(a,'.') from t1; +concat(a,'.') +bbb. +bbb. +aa. +drop table t1; +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))) engine=maria; +insert into t1 values('807780', '477', '165'); +insert into t1 values('807780', '477', '162'); +insert into t1 values('807780', '472', '162'); +select * from t1 where a='807780' and b='477' and c='165'; +a b c +807780 477 165 +drop table t1; +DROP TABLE IF EXISTS t1; +Warnings: +Note 1051 Unknown table 't1' +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)) engine=maria; +INSERT t1 VALUES ("can \tcan"); +INSERT t1 VALUES ("can can"); +INSERT t1 VALUES ("can"); +SELECT * FROM t1; +a +can can +can +can can +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; +create table t1 (a blob) engine=maria; +insert into t1 values('a '),('a'); +select concat(a,'.') from t1 where a='a'; +concat(a,'.') +a. +select concat(a,'.') from t1 where a='a '; +concat(a,'.') +a . +alter table t1 add key(a(2)); +select concat(a,'.') from t1 where a='a'; +concat(a,'.') +a. +select concat(a,'.') from t1 where a='a '; +concat(a,'.') +a . +drop table t1; +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))) engine=maria; +insert into t1 (b) values ('a'),('b'),('c'); +select concat(b,'.') from t1; +concat(b,'.') +a. +b. +c. +update t1 set b='b ' where a=2; +update t1 set b='b ' where a > 1; +ERROR 23000: Duplicate entry 'b ' for key 'b' +insert into t1 (b) values ('b'); +ERROR 23000: Duplicate entry 'b' for key 'b' +select * from t1; +a b +1 a +2 b +3 c +delete from t1 where b='b'; +select a,concat(b,'.') from t1; +a concat(b,'.') +1 a. +3 c. +drop table t1; +create table t1 (a int not null) engine=maria; +create table t2 (a int not null, primary key (a)) engine=maria; +insert into t1 values (1); +insert into t2 values (1),(2); +select sql_big_result distinct t1.a from t1,t2 order by t2.a; +a +1 +select distinct t1.a from t1,t2 order by t2.a; +a +1 +select sql_big_result distinct t1.a from t1,t2; +a +1 +explain select sql_big_result distinct t1.a from t1,t2 order by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary +1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct +explain select distinct t1.a from t1,t2 order by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary +1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct +drop table t1,t2; +create table t1 ( +c1 varchar(32), +key (c1) +) engine=maria; +alter table t1 disable keys; +insert into t1 values ('a'), ('b'); +select c1 from t1 order by c1 limit 1; +c1 +a +drop table t1; +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)) ENGINE=MARIA; +Got one of the listed errors +create table t1 (a int, b varchar(200), c text not null) checksum=1 engine=maria; +create table t2 (a int, b varchar(200), c text not null) checksum=0 engine=maria; +insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); +insert t2 select * from t1; +checksum table t1, t2, t3 quick; +Table Checksum +test.t1 2948697075 +test.t2 NULL +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +checksum table t1, t2, t3; +Table Checksum +test.t1 2948697075 +test.t2 2948697075 +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +checksum table t1, t2, t3 extended; +Table Checksum +test.t1 2948697075 +test.t2 2948697075 +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +drop table t1,t2; +create table t1 (a int, key (a)) engine=maria; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE +alter table t1 disable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +create table t2 (a int) engine=maria; +set @@rand_seed1=31415926,@@rand_seed2=2718281828; +insert t1 select * from t2; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +alter table t1 enable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 1000 NULL NULL YES BTREE +alter table t1 engine=heap; +alter table t1 disable keys; +Warnings: +Note 1031 Table storage engine for 't1' doesn't have this option +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a NULL 500 NULL NULL YES HASH +drop table t1,t2; +create table t1 ( a tinytext, b char(1), index idx (a(1),b)) engine=maria; +insert into t1 values (null,''), (null,''); +explain select count(*) from t1 where a is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref idx idx 4 const 1 Using where +select count(*) from t1 where a is null; +count(*) +2 +drop table t1; +create table t1 (c1 int, c2 varchar(4) not null default '', +key(c2(3))) default charset=utf8 engine=maria; +insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); +update t1 set c2='A B' where c1=2; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (c1 int) engine=maria; +insert into t1 values (1),(2),(3),(4); +checksum table t1; +Table Checksum +test.t1 149057747 +delete from t1 where c1 = 1; +create table t2 engine=maria as select * from t1; +checksum table t1; +Table Checksum +test.t1 984116287 +checksum table t2; +Table Checksum +test.t2 984116287 +drop table t1, t2; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_unequal +create table t1 (a int, key(a)) engine=maria; +insert into t1 values (0),(1),(2),(3),(4); +insert into t1 select NULL from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +set maria_stats_method=nulls_equal; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_equal +insert into t1 values (11); +delete from t1 where a=11; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 5 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 5 NULL NULL YES BTREE +set maria_stats_method=DEFAULT; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_unequal +insert into t1 values (11); +delete from t1 where a=11; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +drop table t1; +set maria_stats_method=nulls_ignored; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_ignored +create table t1 ( +a char(3), b char(4), c char(5), d char(6), +key(a,b,c,d) +) engine=maria; +insert into t1 values ('bcd','def1', NULL, 'zz'); +insert into t1 values ('bcd','def2', NULL, 'zz'); +insert into t1 values ('bce','def1', 'yuu', NULL); +insert into t1 values ('bce','def2', NULL, 'quux'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 2 NULL NULL YES BTREE +t1 1 a 2 b A 4 NULL NULL YES BTREE +t1 1 a 3 c A 4 NULL NULL YES BTREE +t1 1 a 4 d A 4 NULL NULL YES BTREE +delete from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 0 NULL NULL YES BTREE +t1 1 a 2 b A 0 NULL NULL YES BTREE +t1 1 a 3 c A 0 NULL NULL YES BTREE +t1 1 a 4 d A 0 NULL NULL YES BTREE +set maria_stats_method=DEFAULT; +drop table t1; +create table t1( +cip INT NOT NULL, +time TIME NOT NULL, +score INT NOT NULL DEFAULT 0, +bob TINYBLOB +) engine=maria; +insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); +insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), +(6, 'c', '00:06'); +select * from t1 where bob is null and cip=1; +cip time score bob +1 00:01:00 0 NULL +create index bug on t1 (bob(22), cip, time); +select * from t1 where bob is null and cip=1; +cip time score bob +1 00:01:00 0 NULL +drop table t1; +create table t1 ( +id1 int not null auto_increment, +id2 int not null default '0', +t text not null, +primary key (id1), +key x (id2, t(32)) +) engine=maria; +insert into t1 (id2, t) values +(10, 'abc'), (10, 'abc'), (10, 'abc'), +(20, 'abc'), (20, 'abc'), (20, 'def'), +(10, 'abc'), (10, 'abc'); +select count(*) from t1 where id2 = 10; +count(*) +5 +select count(id1) from t1 where id2 = 10; +count(id1) +5 +drop table t1; +set storage_engine=MARIA; +drop table if exists t1,t2,t3; +--- Testing varchar --- +--- Testing varchar --- +create table t1 (v varchar(10), c char(10), t text); +insert into t1 values('+ ', '+ ', '+ '); +set @a=repeat(' ',20); +insert into t1 values (concat('+',@a),concat('+',@a),concat('+',@a)); +Warnings: +Note 1265 Data truncated for column 'v' at row 1 +select concat('*',v,'*',c,'*',t,'*') from t1; +concat('*',v,'*',c,'*',t,'*') +*+ *+*+ * +*+ *+*+ * +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +create table t2 like t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +create table t3 select * from t1; +show create table t3; +Table Create Table +t3 CREATE TABLE `t3` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify c varchar(10); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify v char(10); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify t varchar(10); +Warnings: +Note 1265 Data truncated for column 't' at row 2 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` varchar(10) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select concat('*',v,'*',c,'*',t,'*') from t1; +concat('*',v,'*',c,'*',t,'*') +*+*+*+ * +*+*+*+ * +drop table t1,t2,t3; +create table t1 (v varchar(10), c char(10), t text, key(v), key(c), key(t(10))); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `v` (`v`), + KEY `c` (`c`), + KEY `t` (`t`(10)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1; +count(*) +270 +insert into t1 values(concat('a',char(1)),concat('a',char(1)),concat('a',char(1))); +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where c='a'; +count(*) +10 +select count(*) from t1 where t='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where c='a '; +count(*) +10 +select count(*) from t1 where t='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where c like 'a%'; +count(*) +11 +select count(*) from t1 where t like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +explain select count(*) from t1 where c='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref c c 11 const # Using where; Using index +explain select count(*) from t1 where t='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range t t 13 NULL # Using where +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 13 NULL # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +alter table t1 add unique(v); +ERROR 23000: Duplicate entry '{ ' for key 'v_2' +alter table t1 add key(v); +select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a'; +qq +*a*a*a* +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v,v_2 # 13 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(c) from t1 group by v limit 10; +v count(c) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(c) from t1 group by v limit 10; +v count(c) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select c,count(*) from t1 group by c limit 10; +c count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select c,count(t) from t1 group by c limit 10; +c count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result c,count(t) from t1 group by c limit 10; +c count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select t,count(*) from t1 group by t limit 10; +t count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select t,count(t) from t1 group by t limit 10; +t count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result t,count(t) from t1 group by t limit 10; +t count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 modify v varchar(300), drop key v, drop key v_2, add key v (v); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(300) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 303 NULL # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 drop key v, add key v (v(30)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(300) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`(30)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 33 NULL # Using where +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 modify v varchar(600), drop key v, add key v (v); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(600) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +drop table t1; +create table t1 (a char(10), unique (a)); +insert into t1 values ('a '); +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a' for key 'a' +alter table t1 modify a varchar(10); +insert into t1 values ('a '),('a '),('a '),('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +update t1 set a='a ' where a like 'a%'; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='abc ' where a like 'a '; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='a ' where a like 'a %'; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='a ' where a like 'a '; +select concat(a,'.') from t1; +concat(a,'.') +a . +drop table t1; +create table t1 (v varchar(10), c char(10), t text, key(v(5)), key(c(5)), key(t(5))); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `v` (`v`(5)), + KEY `c` (`c`(5)), + KEY `t` (`t`(5)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v char(10) character set utf8); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) CHARACTER SET utf8 DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(10), c char(10)) row_format=fixed; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED +insert into t1 values('a','a'),('a ','a '); +select concat('*',v,'*',c,'*') from t1; +concat('*',v,'*',c,'*') +*a*a* +*a *a* +drop table t1; +create table t1 (v varchar(65530), key(v(10))); +insert into t1 values(repeat('a',65530)); +select length(v) from t1 where v=repeat('a',65530); +length(v) +65530 +drop table t1; +create table t1(a int, b varchar(12), key ba(b, a)); +insert into t1 values (1, 'A'), (20, NULL); +explain select * from t1 where a=20 and b is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref ba ba 20 const,const 1 Using where; Using index +select * from t1 where a=20 and b is null; +a b +20 NULL +drop table t1; +create table t1 (v varchar(65530), key(v)) engine=maria; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +drop table if exists t1; +create table t1 (v varchar(65536)) engine=maria; +Warnings: +Note 1246 Converting column 'v' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` mediumtext +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(65530) character set utf8) engine=maria; +Warnings: +Note 1246 Converting column 'v' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` mediumtext CHARACTER SET utf8 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(65535)) engine=maria; +ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. You have to change some columns to TEXT or BLOBs +set storage_engine=MyISAM; +create table t1 (a int) engine=maria; +drop table if exists t1; +Warnings: +Error 2 Can't find file: 't1' (errno: 2) +create table t1 (a int) engine=maria; +drop table t1; +Got one of the listed errors +create table t1 (a int) engine=maria; +drop table t1; +Got one of the listed errors +drop table t1; +ERROR 42S02: Unknown table 't1' +set @save_concurrent_insert=@@concurrent_insert; +set global concurrent_insert=1; +create table t1 (a int) engine=maria; +insert into t1 values (1),(2),(3),(4),(5); +lock table t1 read local; +insert into t1 values(6),(7); +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +set global concurrent_insert=2; +insert into t1 values (8),(9); +unlock tables; +insert into t1 values (10),(11),(12); +select * from t1; +a +1 +2 +11 +10 +5 +6 +7 +8 +9 +12 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a int, b varchar(30) default "hello") engine=maria; +insert into t1 (a) values (1),(2),(3),(4),(5); +lock table t1 read local; +insert into t1 (a) values(6),(7); +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +set global concurrent_insert=2; +insert into t1 (a) values (8),(9); +unlock tables; +insert into t1 (a) values (10),(11),(12); +select a from t1; +a +1 +2 +11 +10 +5 +6 +7 +8 +9 +12 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +set global concurrent_insert=@save_concurrent_insert; +create table t1 (a int, key(a)) engine=maria; +insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 8 NULL NULL YES BTREE +alter table t1 disable keys; +alter table t1 enable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 8 NULL NULL YES BTREE +drop table t1; +create table t1 (c1 int) engine=maria pack_keys=0; +create table t2 (c1 int) engine=maria pack_keys=1; +create table t3 (c1 int) engine=maria pack_keys=default; +create table t4 (c1 int) engine=maria pack_keys=2; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '2' at line 1 +drop table t1, t2, t3; diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result new file mode 100644 index 00000000000..a5cb3050dcb --- /dev/null +++ b/mysql-test/r/ps_maria.result @@ -0,0 +1,3137 @@ +use test; +drop table if exists t1, t9 ; +create table t1 +( +a int, b varchar(30), +primary key(a) +) engine = 'MARIA' ; +create table t9 +( +c1 tinyint, c2 smallint, c3 mediumint, c4 int, +c5 integer, c6 bigint, c7 float, c8 double, +c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4), +c13 date, c14 datetime, c15 timestamp, c16 time, +c17 year, c18 tinyint, c19 bool, c20 char, +c21 char(10), c22 varchar(30), c23 tinyblob, c24 tinytext, +c25 blob, c26 text, c27 mediumblob, c28 mediumtext, +c29 longblob, c30 longtext, c31 enum('one', 'two', 'three'), +c32 set('monday', 'tuesday', 'wednesday'), +primary key(c1) +) engine = 'MARIA' ; +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +test_sequence +------ simple select tests ------ +prepare stmt1 from ' select * from t9 order by c1 ' ; +execute stmt1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t9 t9 c1 c1 1 4 1 N 49155 0 63 +def test t9 t9 c2 c2 2 6 1 Y 32768 0 63 +def test t9 t9 c3 c3 9 9 1 Y 32768 0 63 +def test t9 t9 c4 c4 3 11 1 Y 32768 0 63 +def test t9 t9 c5 c5 3 11 1 Y 32768 0 63 +def test t9 t9 c6 c6 8 20 1 Y 32768 0 63 +def test t9 t9 c7 c7 4 12 1 Y 32768 31 63 +def test t9 t9 c8 c8 5 22 1 Y 32768 31 63 +def test t9 t9 c9 c9 5 22 1 Y 32768 31 63 +def test t9 t9 c10 c10 5 22 1 Y 32768 31 63 +def test t9 t9 c11 c11 246 9 6 Y 0 4 63 +def test t9 t9 c12 c12 246 10 6 Y 0 4 63 +def test t9 t9 c13 c13 10 10 10 Y 128 0 63 +def test t9 t9 c14 c14 12 19 19 Y 128 0 63 +def test t9 t9 c15 c15 7 19 19 N 1249 0 63 +def test t9 t9 c16 c16 11 8 8 Y 128 0 63 +def test t9 t9 c17 c17 13 4 4 Y 32864 0 63 +def test t9 t9 c18 c18 1 4 1 Y 32768 0 63 +def test t9 t9 c19 c19 1 1 1 Y 32768 0 63 +def test t9 t9 c20 c20 254 1 1 Y 0 0 8 +def test t9 t9 c21 c21 254 10 10 Y 0 0 8 +def test t9 t9 c22 c22 253 30 30 Y 0 0 8 +def test t9 t9 c23 c23 252 255 8 Y 144 0 63 +def test t9 t9 c24 c24 252 255 8 Y 16 0 8 +def test t9 t9 c25 c25 252 65535 4 Y 144 0 63 +def test t9 t9 c26 c26 252 65535 4 Y 16 0 8 +def test t9 t9 c27 c27 252 16777215 10 Y 144 0 63 +def test t9 t9 c28 c28 252 16777215 10 Y 16 0 8 +def test t9 t9 c29 c29 252 4294967295 8 Y 144 0 63 +def test t9 t9 c30 c30 252 4294967295 8 Y 16 0 8 +def test t9 t9 c31 c31 254 5 3 Y 256 0 8 +def test t9 t9 c32 c32 254 24 7 Y 2048 0 8 +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday +set @arg00='SELECT' ; +@arg00 a from t1 where a=1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a from t1 where a=1' at line 1 +prepare stmt1 from ' ? a from t1 where a=1 '; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a from t1 where a=1' at line 1 +set @arg00=1 ; +select @arg00, b from t1 where a=1 ; +@arg00 b +1 one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +1 one +set @arg00='lion' ; +select @arg00, b from t1 where a=1 ; +@arg00 b +lion one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +lion one +set @arg00=NULL ; +select @arg00, b from t1 where a=1 ; +@arg00 b +NULL one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +NULL one +set @arg00=1 ; +select b, a - @arg00 from t1 where a=1 ; +b a - @arg00 +one 0 +prepare stmt1 from ' select b, a - ? from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +b a - ? +one 0 +set @arg00=null ; +select @arg00 as my_col ; +my_col +NULL +prepare stmt1 from ' select ? as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +select @arg00 + 1 as my_col ; +my_col +NULL +prepare stmt1 from ' select ? + 1 as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +select 1 + @arg00 as my_col ; +my_col +NULL +prepare stmt1 from ' select 1 + ? as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +set @arg00='MySQL' ; +select substr(@arg00,1,2) from t1 where a=1 ; +substr(@arg00,1,2) +My +prepare stmt1 from ' select substr(?,1,2) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr(?,1,2) +My +set @arg00=3 ; +select substr('MySQL',@arg00,5) from t1 where a=1 ; +substr('MySQL',@arg00,5) +SQL +prepare stmt1 from ' select substr(''MySQL'',?,5) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr('MySQL',?,5) +SQL +select substr('MySQL',1,@arg00) from t1 where a=1 ; +substr('MySQL',1,@arg00) +MyS +prepare stmt1 from ' select substr(''MySQL'',1,?) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr('MySQL',1,?) +MyS +set @arg00='MySQL' ; +select a , concat(@arg00,b) from t1 order by a; +a concat(@arg00,b) +1 MySQLone +2 MySQLtwo +3 MySQLthree +4 MySQLfour +prepare stmt1 from ' select a , concat(?,b) from t1 order by a ' ; +execute stmt1 using @arg00; +a concat(?,b) +1 MySQLone +2 MySQLtwo +3 MySQLthree +4 MySQLfour +select a , concat(b,@arg00) from t1 order by a ; +a concat(b,@arg00) +1 oneMySQL +2 twoMySQL +3 threeMySQL +4 fourMySQL +prepare stmt1 from ' select a , concat(b,?) from t1 order by a ' ; +execute stmt1 using @arg00; +a concat(b,?) +1 oneMySQL +2 twoMySQL +3 threeMySQL +4 fourMySQL +set @arg00='MySQL' ; +select group_concat(@arg00,b order by a) from t1 +group by 'a' ; +group_concat(@arg00,b order by a) +MySQLone,MySQLtwo,MySQLthree,MySQLfour +prepare stmt1 from ' select group_concat(?,b order by a) from t1 +group by ''a'' ' ; +execute stmt1 using @arg00; +group_concat(?,b order by a) +MySQLone,MySQLtwo,MySQLthree,MySQLfour +select group_concat(b,@arg00 order by a) from t1 +group by 'a' ; +group_concat(b,@arg00 order by a) +oneMySQL,twoMySQL,threeMySQL,fourMySQL +prepare stmt1 from ' select group_concat(b,? order by a) from t1 +group by ''a'' ' ; +execute stmt1 using @arg00; +group_concat(b,? order by a) +oneMySQL,twoMySQL,threeMySQL,fourMySQL +set @arg00='first' ; +set @arg01='second' ; +set @arg02=NULL; +select @arg00, @arg01 from t1 where a=1 ; +@arg00 @arg01 +first second +prepare stmt1 from ' select ?, ? from t1 where a=1 ' ; +execute stmt1 using @arg00, @arg01 ; +? ? +first second +execute stmt1 using @arg02, @arg01 ; +? ? +NULL second +execute stmt1 using @arg00, @arg02 ; +? ? +first NULL +execute stmt1 using @arg02, @arg02 ; +? ? +NULL NULL +drop table if exists t5 ; +create table t5 (id1 int(11) not null default '0', +value2 varchar(100), value1 varchar(100)) ; +insert into t5 values (1,'hh','hh'),(2,'hh','hh'), +(1,'ii','ii'),(2,'ii','ii') ; +prepare stmt1 from ' select id1,value1 from t5 where id1=? or value1=? order by id1,value1 ' ; +set @arg00=1 ; +set @arg01='hh' ; +execute stmt1 using @arg00, @arg01 ; +id1 value1 +1 hh +1 ii +2 hh +drop table t5 ; +drop table if exists t5 ; +create table t5(session_id char(9) not null) ; +insert into t5 values ('abc') ; +prepare stmt1 from ' select * from t5 +where ?=''1111'' and session_id = ''abc'' ' ; +set @arg00='abc' ; +execute stmt1 using @arg00 ; +session_id +set @arg00='1111' ; +execute stmt1 using @arg00 ; +session_id +abc +set @arg00='abc' ; +execute stmt1 using @arg00 ; +session_id +drop table t5 ; +set @arg00='FROM' ; +select a @arg00 t1 where a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 t1 where a=1' at line 1 +prepare stmt1 from ' select a ? t1 where a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? t1 where a=1' at line 1 +set @arg00='t1' ; +select a from @arg00 where a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 where a=1' at line 1 +prepare stmt1 from ' select a from ? where a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? where a=1' at line 1 +set @arg00='WHERE' ; +select a from t1 @arg00 a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a=1' at line 1 +prepare stmt1 from ' select a from t1 ? a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a=1' at line 1 +set @arg00=1 ; +select a FROM t1 where a=@arg00 ; +a +1 +prepare stmt1 from ' select a FROM t1 where a=? ' ; +execute stmt1 using @arg00 ; +a +1 +set @arg00=1000 ; +execute stmt1 using @arg00 ; +a +set @arg00=NULL ; +select a FROM t1 where a=@arg00 ; +a +prepare stmt1 from ' select a FROM t1 where a=? ' ; +execute stmt1 using @arg00 ; +a +set @arg00=4 ; +select a FROM t1 where a=sqrt(@arg00) ; +a +2 +prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ; +execute stmt1 using @arg00 ; +a +2 +set @arg00=NULL ; +select a FROM t1 where a=sqrt(@arg00) ; +a +prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ; +execute stmt1 using @arg00 ; +a +set @arg00=2 ; +set @arg01=3 ; +select a FROM t1 where a in (@arg00,@arg01) order by a; +a +2 +3 +prepare stmt1 from ' select a FROM t1 where a in (?,?) order by a '; +execute stmt1 using @arg00, @arg01; +a +2 +3 +set @arg00= 'one' ; +set @arg01= 'two' ; +set @arg02= 'five' ; +prepare stmt1 from ' select b FROM t1 where b in (?,?,?) order by b ' ; +execute stmt1 using @arg00, @arg01, @arg02 ; +b +one +two +prepare stmt1 from ' select b FROM t1 where b like ? '; +set @arg00='two' ; +execute stmt1 using @arg00 ; +b +two +set @arg00='tw%' ; +execute stmt1 using @arg00 ; +b +two +set @arg00='%wo' ; +execute stmt1 using @arg00 ; +b +two +set @arg00=null ; +insert into t9 set c1= 0, c5 = NULL ; +select c5 from t9 where c5 > NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 > ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 < NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 < ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 = NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 = ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 <=> NULL ; +c5 +NULL +prepare stmt1 from ' select c5 from t9 where c5 <=> ? '; +execute stmt1 using @arg00 ; +c5 +NULL +delete from t9 where c1= 0 ; +set @arg00='>' ; +select a FROM t1 where a @arg00 1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 1' at line 1 +prepare stmt1 from ' select a FROM t1 where a ? 1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? 1' at line 1 +set @arg00=1 ; +select a,b FROM t1 where a is not NULL +AND b is not NULL group by a - @arg00 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL group by a - ? ' ; +execute stmt1 using @arg00 ; +a b +1 one +2 two +3 three +4 four +set @arg00='two' ; +select a,b FROM t1 where a is not NULL +AND b is not NULL having b <> @arg00 order by a ; +a b +1 one +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL having b <> ? order by a ' ; +execute stmt1 using @arg00 ; +a b +1 one +3 three +4 four +set @arg00=1 ; +select a,b FROM t1 where a is not NULL +AND b is not NULL order by a - @arg00 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL order by a - ? ' ; +execute stmt1 using @arg00 ; +a b +1 one +2 two +3 three +4 four +set @arg00=2 ; +select a,b from t1 order by 2 ; +a b +4 four +1 one +3 three +2 two +prepare stmt1 from ' select a,b from t1 +order by ? '; +execute stmt1 using @arg00; +a b +4 four +1 one +3 three +2 two +set @arg00=1 ; +execute stmt1 using @arg00; +a b +1 one +2 two +3 three +4 four +set @arg00=0 ; +execute stmt1 using @arg00; +ERROR 42S22: Unknown column '?' in 'order clause' +set @arg00=1; +prepare stmt1 from ' select a,b from t1 order by a +limit 1 '; +execute stmt1 ; +a b +1 one +prepare stmt1 from ' select a,b from t1 order by a limit ? '; +execute stmt1 using @arg00; +a b +1 one +set @arg00='b' ; +set @arg01=0 ; +set @arg02=2 ; +set @arg03=2 ; +select sum(a), @arg00 from t1 where a > @arg01 +and b is not null group by substr(b,@arg02) +having sum(a) <> @arg03 ; +sum(a) @arg00 +3 b +1 b +4 b +prepare stmt1 from ' select sum(a), ? from t1 where a > ? +and b is not null group by substr(b,?) +having sum(a) <> ? '; +execute stmt1 using @arg00, @arg01, @arg02, @arg03; +sum(a) ? +3 b +1 b +4 b +test_sequence +------ join tests ------ +select first.a as a1, second.a as a2 +from t1 first, t1 second +where first.a = second.a order by a1 ; +a1 a2 +1 1 +2 2 +3 3 +4 4 +prepare stmt1 from ' select first.a as a1, second.a as a2 + from t1 first, t1 second + where first.a = second.a order by a1 '; +execute stmt1 ; +a1 a2 +1 1 +2 2 +3 3 +4 4 +set @arg00='ABC'; +set @arg01='two'; +set @arg02='one'; +select first.a, @arg00, second.a FROM t1 first, t1 second +where @arg01 = first.b or first.a = second.a or second.b = @arg02 +order by second.a, first.a; +a @arg00 a +1 ABC 1 +2 ABC 1 +3 ABC 1 +4 ABC 1 +2 ABC 2 +2 ABC 3 +3 ABC 3 +2 ABC 4 +4 ABC 4 +prepare stmt1 from ' select first.a, ?, second.a FROM t1 first, t1 second + where ? = first.b or first.a = second.a or second.b = ? + order by second.a, first.a'; +execute stmt1 using @arg00, @arg01, @arg02; +a ? a +1 ABC 1 +2 ABC 1 +3 ABC 1 +4 ABC 1 +2 ABC 2 +2 ABC 3 +3 ABC 3 +2 ABC 4 +4 ABC 4 +drop table if exists t2 ; +create table t2 as select * from t1 ; +set @query1= 'SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a ' ; +set @query2= 'SELECT * FROM t2 natural join t1 order by t2.a ' ; +set @query3= 'SELECT * FROM t2 join t1 using(a) order by t2.a ' ; +set @query4= 'SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a ' ; +set @query5= 'SELECT * FROM t2 natural left join t1 order by t2.a ' ; +set @query6= 'SELECT * FROM t2 left join t1 using(a) order by t2.a ' ; +set @query7= 'SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a ' ; +set @query8= 'SELECT * FROM t2 natural right join t1 order by t2.a ' ; +set @query9= 'SELECT * FROM t2 right join t1 using(a) order by t2.a ' ; +the join statement is: +SELECT * FROM t2 right join t1 using(a) order by t2.a +prepare stmt1 from @query9 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural right join t1 order by t2.a +prepare stmt1 from @query8 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a +prepare stmt1 from @query7 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +the join statement is: +SELECT * FROM t2 left join t1 using(a) order by t2.a +prepare stmt1 from @query6 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural left join t1 order by t2.a +prepare stmt1 from @query5 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a +prepare stmt1 from @query4 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +the join statement is: +SELECT * FROM t2 join t1 using(a) order by t2.a +prepare stmt1 from @query3 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural join t1 order by t2.a +prepare stmt1 from @query2 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a +prepare stmt1 from @query1 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +drop table t2 ; +test_sequence +------ subquery tests ------ +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ''two'') '; +execute stmt1 ; +a b +2 two +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = 'two' ) and b=@arg00 ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ''two'') and b=? '; +execute stmt1 using @arg00; +a b +2 two +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = @arg00 ) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ? ) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=3 ; +set @arg01='three' ; +select a,b FROM t1 where (a,b) in (select 3, 'three'); +a b +3 three +select a FROM t1 where (a,b) in (select @arg00,@arg01); +a +3 +prepare stmt1 from ' select a FROM t1 where (a,b) in (select ?, ?) '; +execute stmt1 using @arg00, @arg01; +a +3 +set @arg00=1 ; +set @arg01='two' ; +set @arg02=2 ; +set @arg03='two' ; +select a, @arg00, b FROM t1 outer_table where +b=@arg01 and a = (select @arg02 from t1 where b = @arg03 ) ; +a @arg00 b +2 1 two +prepare stmt1 from ' select a, ?, b FROM t1 outer_table where + b=? and a = (select ? from t1 where b = ? ) ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +a ? b +2 1 two +prepare stmt1 from 'select c4 FROM t9 where + c13 = (select MAX(b) from t1 where a = ?) and c22 = ? ' ; +execute stmt1 using @arg01, @arg02; +c4 +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = outer_table.b ) order by a '; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = outer_table.b ) and b=@arg00 ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = outer_table.b) and b=? '; +execute stmt1 using @arg00; +a b +2 two +set @arg00=2 ; +select a, b FROM t1 outer_table where +a = (select a from t1 where a = @arg00 and b = outer_table.b) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where a = ? and b = outer_table.b) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=2 ; +select a, b FROM t1 outer_table where +a = (select a from t1 where outer_table.a = @arg00 and a=2) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where outer_table.a = ? and a=2) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=1 ; +set @arg01='two' ; +set @arg02=2 ; +set @arg03='two' ; +select a, @arg00, b FROM t1 outer_table where +b=@arg01 and a = (select @arg02 from t1 where outer_table.b = @arg03 +and outer_table.a=a ) ; +a @arg00 b +2 1 two +prepare stmt1 from ' select a, ?, b FROM t1 outer_table where + b=? and a = (select ? from t1 where outer_table.b = ? + and outer_table.a=a ) ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +a ? b +2 1 two +set @arg00=1 ; +set @arg01=0 ; +select a, @arg00 +from ( select a - @arg00 as a from t1 where a=@arg00 ) as t2 +where a=@arg01; +a @arg00 +0 1 +prepare stmt1 from ' select a, ? + from ( select a - ? as a from t1 where a=? ) as t2 + where a=? '; +execute stmt1 using @arg00, @arg00, @arg00, @arg01 ; +a ? +0 1 +drop table if exists t2 ; +create table t2 as select * from t1; +prepare stmt1 from ' select a in (select a from t2) from t1 ' ; +execute stmt1 ; +a in (select a from t2) +1 +1 +1 +1 +drop table if exists t5, t6, t7 ; +create table t5 (a int , b int) ; +create table t6 like t5 ; +create table t7 like t5 ; +insert into t5 values (0, 100), (1, 2), (1, 3), (2, 2), (2, 7), +(2, -1), (3, 10) ; +insert into t6 values (0, 0), (1, 1), (2, 1), (3, 1), (4, 1) ; +insert into t7 values (3, 3), (2, 2), (1, 1) ; +prepare stmt1 from ' select a, (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) from t7 ' ; +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +drop table t5, t6, t7 ; +drop table if exists t2 ; +create table t2 as select * from t9; +set @stmt= ' SELECT + (SELECT SUM(c1 + c12 + 0.0) FROM t2 + where (t9.c2 - 0e-3) = t2.c2 + GROUP BY t9.c15 LIMIT 1) as scalar_s, + exists (select 1.0e+0 from t2 + where t2.c3 * 9.0000000000 = t9.c4) as exists_s, + c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s, + (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s +FROM t9, +(select c25 x, c32 y from t2) tt WHERE x = c25 ' ; +prepare stmt1 from @stmt ; +execute stmt1 ; +execute stmt1 ; +set @stmt= concat('explain ',@stmt); +prepare stmt1 from @stmt ; +execute stmt1 ; +execute stmt1 ; +set @stmt= ' SELECT + (SELECT SUM(c1+c12+?) FROM t2 where (t9.c2-?)=t2.c2 + GROUP BY t9.c15 LIMIT 1) as scalar_s, + exists (select ? from t2 + where t2.c3*?=t9.c4) as exists_s, + c5*? in (select c6+? from t2) as in_s, + (c7-?, c8-?) in (select c9+?, c10+? from t2) as in_row_s +FROM t9, +(select c25 x, c32 y from t2) tt WHERE x =c25 ' ; +set @arg00= 0.0 ; +set @arg01= 0e-3 ; +set @arg02= 1.0e+0 ; +set @arg03= 9.0000000000 ; +set @arg04= 4 ; +set @arg05= 0.3e+1 ; +set @arg06= 4 ; +set @arg07= 4 ; +set @arg08= 4.0 ; +set @arg09= 40e-1 ; +prepare stmt1 from @stmt ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +set @stmt= concat('explain ',@stmt); +prepare stmt1 from @stmt ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +drop table t2 ; +select 1 < (select a from t1) ; +ERROR 21000: Subquery returns more than 1 row +prepare stmt1 from ' select 1 < (select a from t1) ' ; +execute stmt1 ; +ERROR 21000: Subquery returns more than 1 row +select 1 as my_col ; +my_col +1 +test_sequence +------ union tests ------ +prepare stmt1 from ' select a FROM t1 where a=1 + union distinct + select a FROM t1 where a=1 '; +execute stmt1 ; +a +1 +execute stmt1 ; +a +1 +prepare stmt1 from ' select a FROM t1 where a=1 + union all + select a FROM t1 where a=1 '; +execute stmt1 ; +a +1 +1 +prepare stmt1 from ' SELECT 1, 2 union SELECT 1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT 1 union SELECT 1, 2 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT * from t1 union SELECT 1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT 1 union SELECT * from t1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +set @arg00=1 ; +select @arg00 FROM t1 where a=1 +union distinct +select 1 FROM t1 where a=1; +@arg00 +1 +prepare stmt1 from ' select ? FROM t1 where a=1 + union distinct + select 1 FROM t1 where a=1 ' ; +execute stmt1 using @arg00; +? +1 +set @arg00=1 ; +select 1 FROM t1 where a=1 +union distinct +select @arg00 FROM t1 where a=1; +1 +1 +prepare stmt1 from ' select 1 FROM t1 where a=1 + union distinct + select ? FROM t1 where a=1 ' ; +execute stmt1 using @arg00; +1 +1 +set @arg00='a' ; +select @arg00 FROM t1 where a=1 +union distinct +select @arg00 FROM t1 where a=1; +@arg00 +a +prepare stmt1 from ' select ? FROM t1 where a=1 + union distinct + select ? FROM t1 where a=1 '; +execute stmt1 using @arg00, @arg00; +? +a +prepare stmt1 from ' select ? + union distinct + select ? '; +execute stmt1 using @arg00, @arg00; +? +a +set @arg00='a' ; +set @arg01=1 ; +set @arg02='a' ; +set @arg03=2 ; +select @arg00 FROM t1 where a=@arg01 +union distinct +select @arg02 FROM t1 where a=@arg03; +@arg00 +a +prepare stmt1 from ' select ? FROM t1 where a=? + union distinct + select ? FROM t1 where a=? ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03; +? +a +set @arg00=1 ; +prepare stmt1 from ' select sum(a) + 200, ? from t1 +union distinct +select sum(a) + 200, 1 from t1 +group by b ' ; +execute stmt1 using @arg00; +sum(a) + 200 ? +210 1 +204 1 +201 1 +203 1 +202 1 +set @Oporto='Oporto' ; +set @Lisboa='Lisboa' ; +set @0=0 ; +set @1=1 ; +set @2=2 ; +set @3=3 ; +set @4=4 ; +select @Oporto,@Lisboa,@0,@1,@2,@3,@4 ; +@Oporto @Lisboa @0 @1 @2 @3 @4 +Oporto Lisboa 0 1 2 3 4 +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +group by b +union distinct +select sum(a) + 200, @Lisboa from t1 +group by b ; +the_sum the_town +204 Oporto +201 Oporto +203 Oporto +202 Oporto +204 Lisboa +201 Lisboa +203 Lisboa +202 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + group by b + union distinct + select sum(a) + 200, ? from t1 + group by b ' ; +execute stmt1 using @Oporto, @Lisboa; +the_sum the_town +204 Oporto +201 Oporto +203 Oporto +202 Oporto +204 Lisboa +201 Lisboa +203 Lisboa +202 Lisboa +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +where a > @1 +group by b +union distinct +select sum(a) + 200, @Lisboa from t1 +where a > @2 +group by b ; +the_sum the_town +204 Oporto +203 Oporto +202 Oporto +204 Lisboa +203 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + where a > ? + group by b + union distinct + select sum(a) + 200, ? from t1 + where a > ? + group by b ' ; +execute stmt1 using @Oporto, @1, @Lisboa, @2; +the_sum the_town +204 Oporto +203 Oporto +202 Oporto +204 Lisboa +203 Lisboa +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +where a > @1 +group by b +having avg(a) > @2 +union distinct +select sum(a) + 200, @Lisboa from t1 +where a > @2 +group by b +having avg(a) > @3; +the_sum the_town +204 Oporto +203 Oporto +204 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + where a > ? + group by b + having avg(a) > ? + union distinct + select sum(a) + 200, ? from t1 + where a > ? + group by b + having avg(a) > ? '; +execute stmt1 using @Oporto, @1, @2, @Lisboa, @2, @3; +the_sum the_town +204 Oporto +203 Oporto +204 Lisboa +test_sequence +------ explain select tests ------ +prepare stmt1 from ' explain select * from t9 ' ; +execute stmt1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def id 8 3 1 N 32929 0 63 +def select_type 253 19 6 N 1 31 8 +def table 253 64 2 Y 0 31 8 +def type 253 10 3 Y 0 31 8 +def possible_keys 253 4096 0 Y 0 31 8 +def key 253 64 0 Y 0 31 8 +def key_len 253 4096 0 Y 128 31 63 +def ref 253 1024 0 Y 0 31 8 +def rows 8 10 1 Y 32928 0 63 +def Extra 253 255 0 N 1 31 8 +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t9 ALL NULL NULL NULL NULL 2 +drop table if exists t2 ; +create table t2 (s varchar(25), fulltext(s)) +ENGINE = 'MARIA' ; +insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ; +commit ; +prepare stmt1 from ' select s from t2 where match (s) against (?) ' ; +set @arg00='Dogs' ; +execute stmt1 using @arg00 ; +s +Hollow Dogs +prepare stmt1 from ' SELECT s FROM t2 +where match (s) against (concat(?,''digger'')) '; +set @arg00='Grave' ; +execute stmt1 using @arg00 ; +s +Gravedigger +drop table t2 ; +test_sequence +------ delete tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'delete from t1 where a=2' ; +execute stmt1; +select a,b from t1 where a=2; +a b +execute stmt1; +insert into t1 values(0,NULL); +set @arg00=NULL; +prepare stmt1 from 'delete from t1 where b=?' ; +execute stmt1 using @arg00; +select a,b from t1 where b is NULL ; +a b +0 NULL +set @arg00='one'; +execute stmt1 using @arg00; +select a,b from t1 where b=@arg00; +a b +prepare stmt1 from 'truncate table t1' ; +test_sequence +------ update tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'update t1 set b=''a=two'' where a=2' ; +execute stmt1; +select a,b from t1 where a=2; +a b +2 a=two +execute stmt1; +select a,b from t1 where a=2; +a b +2 a=two +set @arg00=NULL; +prepare stmt1 from 'update t1 set b=? where a=2' ; +execute stmt1 using @arg00; +select a,b from t1 where a=2; +a b +2 NULL +set @arg00='two'; +execute stmt1 using @arg00; +select a,b from t1 where a=2; +a b +2 two +set @arg00=2; +prepare stmt1 from 'update t1 set b=NULL where a=?' ; +execute stmt1 using @arg00; +select a,b from t1 where a=@arg00; +a b +2 NULL +update t1 set b='two' where a=@arg00; +set @arg00=2000; +execute stmt1 using @arg00; +select a,b from t1 where a=@arg00; +a b +set @arg00=2; +set @arg01=22; +prepare stmt1 from 'update t1 set a=? where a=?' ; +execute stmt1 using @arg00, @arg00; +select a,b from t1 where a=@arg00; +a b +2 two +execute stmt1 using @arg01, @arg00; +select a,b from t1 where a=@arg01; +a b +22 two +execute stmt1 using @arg00, @arg01; +select a,b from t1 where a=@arg00; +a b +2 two +set @arg00=NULL; +set @arg01=2; +execute stmt1 using @arg00, @arg01; +Warnings: +Warning 1048 Column 'a' cannot be null +select a,b from t1 order by a; +a b +0 two +1 one +3 three +4 four +set @arg00=0; +execute stmt1 using @arg01, @arg00; +select a,b from t1 order by a; +a b +1 one +2 two +3 three +4 four +set @arg00=23; +set @arg01='two'; +set @arg02=2; +set @arg03='two'; +set @arg04=2; +drop table if exists t2; +create table t2 as select a,b from t1 ; +prepare stmt1 from 'update t1 set a=? where b=? + and a in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 where a = @arg00 ; +a b +23 two +prepare stmt1 from 'update t1 set a=? where b=? + and a not in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; +a b +1 one +2 two +3 three +4 four +drop table t2 ; +create table t2 +( +a int, b varchar(30), +primary key(a) +) engine = 'MARIA' ; +insert into t2(a,b) select a, b from t1 ; +prepare stmt1 from 'update t1 set a=? where b=? + and a in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 where a = @arg00 ; +a b +23 two +prepare stmt1 from 'update t1 set a=? where b=? + and a not in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; +a b +1 one +2 two +3 three +4 four +drop table t2 ; +set @arg00=1; +prepare stmt1 from 'update t1 set b=''bla'' +where a=2 +limit 1'; +execute stmt1 ; +select a,b from t1 where b = 'bla' ; +a b +2 bla +prepare stmt1 from 'update t1 set b=''bla'' where a=2 limit ?'; +execute stmt1 using @arg00; +test_sequence +------ insert tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'insert into t1 values(5, ''five'' )'; +execute stmt1; +select a,b from t1 where a = 5; +a b +5 five +set @arg00='six' ; +prepare stmt1 from 'insert into t1 values(6, ? )'; +execute stmt1 using @arg00; +select a,b from t1 where b = @arg00; +a b +6 six +execute stmt1 using @arg00; +ERROR 23000: Duplicate entry '6' for key 'PRIMARY' +set @arg00=NULL ; +prepare stmt1 from 'insert into t1 values(0, ? )'; +execute stmt1 using @arg00; +select a,b from t1 where b is NULL; +a b +0 NULL +set @arg00=8 ; +set @arg01='eight' ; +prepare stmt1 from 'insert into t1 values(?, ? )'; +execute stmt1 using @arg00, @arg01 ; +select a,b from t1 where b = @arg01; +a b +8 eight +set @NULL= null ; +set @arg00= 'abc' ; +execute stmt1 using @NULL, @NULL ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @NULL ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @arg00 ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @arg00 ; +ERROR 23000: Column 'a' cannot be null +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @arg00 ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @arg00 ; +select * from t1 where a > 10000 order by a ; +a b +10001 abc +10002 abc +delete from t1 where a > 10000 ; +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @NULL ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @NULL ; +select * from t1 where a > 10000 order by a ; +a b +10001 NULL +10002 NULL +delete from t1 where a > 10000 ; +set @arg01= 10000 + 10 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 9 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 8 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 7 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 6 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 5 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 4 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 3 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @arg01 ; +select * from t1 where a > 10000 order by a ; +a b +10001 10001 +10002 10002 +10003 10003 +10004 10004 +10005 10005 +10006 10006 +10007 10007 +10008 10008 +10009 10009 +10010 10010 +delete from t1 where a > 10000 ; +set @arg00=81 ; +set @arg01='8-1' ; +set @arg02=82 ; +set @arg03='8-2' ; +prepare stmt1 from 'insert into t1 values(?,?),(?,?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +select a,b from t1 where a in (@arg00,@arg02) ; +a b +81 8-1 +82 8-2 +set @arg00=9 ; +set @arg01='nine' ; +prepare stmt1 from 'insert into t1 set a=?, b=? '; +execute stmt1 using @arg00, @arg01 ; +select a,b from t1 where a = @arg00 ; +a b +9 nine +set @arg00=6 ; +set @arg01=1 ; +prepare stmt1 from 'insert into t1 set a=?, b=''sechs'' + on duplicate key update a=a + ?, b=concat(b,''modified'') '; +execute stmt1 using @arg00, @arg01; +select * from t1 order by a; +a b +0 NULL +1 one +2 two +3 three +4 four +5 five +7 sixmodified +8 eight +9 nine +81 8-1 +82 8-2 +set @arg00=81 ; +set @arg01=1 ; +execute stmt1 using @arg00, @arg01; +ERROR 23000: Duplicate entry '82' for key 'PRIMARY' +drop table if exists t2 ; +create table t2 (id int auto_increment primary key) +ENGINE= 'MARIA' ; +prepare stmt1 from ' select last_insert_id() ' ; +insert into t2 values (NULL) ; +execute stmt1 ; +last_insert_id() +1 +insert into t2 values (NULL) ; +execute stmt1 ; +last_insert_id() +2 +drop table t2 ; +set @1000=1000 ; +set @x1000_2="x1000_2" ; +set @x1000_3="x1000_3" ; +set @x1000="x1000" ; +set @1100=1100 ; +set @x1100="x1100" ; +set @100=100 ; +set @updated="updated" ; +insert into t1 values(1000,'x1000_1') ; +insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3) +on duplicate key update a = a + @100, b = concat(b,@updated) ; +select a,b from t1 where a >= 1000 order by a ; +a b +1000 x1000_3 +1100 x1000_1updated +delete from t1 where a >= 1000 ; +insert into t1 values(1000,'x1000_1') ; +prepare stmt1 from ' insert into t1 values(?,?),(?,?) + on duplicate key update a = a + ?, b = concat(b,?) '; +execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ; +select a,b from t1 where a >= 1000 order by a ; +a b +1000 x1000_3 +1100 x1000_1updated +delete from t1 where a >= 1000 ; +insert into t1 values(1000,'x1000_1') ; +execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ; +select a,b from t1 where a >= 1000 order by a ; +a b +1200 x1000_1updatedupdated +delete from t1 where a >= 1000 ; +prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' '; +execute stmt1; +execute stmt1; +execute stmt1; +test_sequence +------ multi table tests ------ +delete from t1 ; +delete from t9 ; +insert into t1(a,b) values (1, 'one'), (2, 'two'), (3, 'three') ; +insert into t9 (c1,c21) +values (1, 'one'), (2, 'two'), (3, 'three') ; +prepare stmt_delete from " delete t1, t9 + from t1, t9 where t1.a=t9.c1 and t1.b='updated' "; +prepare stmt_update from " update t1, t9 + set t1.b='updated', t9.c21='updated' + where t1.a=t9.c1 and t1.a=? "; +prepare stmt_select1 from " select a, b from t1 order by a" ; +prepare stmt_select2 from " select c1, c21 from t9 order by c1" ; +set @arg00= 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +2 two +3 three +execute stmt_select2 ; +c1 c21 +2 two +3 three +set @arg00= @arg00 + 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +3 three +execute stmt_select2 ; +c1 c21 +3 three +set @arg00= @arg00 + 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +execute stmt_select2 ; +c1 c21 +set @arg00= @arg00 + 1 ; +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +insert into t1 values(0,NULL) ; +set @duplicate='duplicate ' ; +set @1000=1000 ; +set @5=5 ; +select a,b from t1 where a < 5 order by a ; +a b +0 NULL +1 one +2 two +3 three +4 four +insert into t1 select a + @1000, concat(@duplicate,b) from t1 +where a < @5 ; +affected rows: 5 +info: Records: 5 Duplicates: 0 Warnings: 0 +select a,b from t1 where a >= 1000 order by a ; +a b +1000 NULL +1001 duplicate one +1002 duplicate two +1003 duplicate three +1004 duplicate four +delete from t1 where a >= 1000 ; +prepare stmt1 from ' insert into t1 select a + ?, concat(?,b) from t1 +where a < ? ' ; +execute stmt1 using @1000, @duplicate, @5; +affected rows: 5 +info: Records: 5 Duplicates: 0 Warnings: 0 +select a,b from t1 where a >= 1000 order by a ; +a b +1000 NULL +1001 duplicate one +1002 duplicate two +1003 duplicate three +1004 duplicate four +delete from t1 where a >= 1000 ; +set @1=1 ; +set @2=2 ; +set @100=100 ; +set @float=1.00; +set @five='five' ; +drop table if exists t2; +create table t2 like t1 ; +insert into t2 (b,a) +select @duplicate, sum(first.a) from t1 first, t1 second +where first.a <> @5 and second.b = first.b +and second.b <> @five +group by second.b +having sum(second.a) > @2 +union +select b, a + @100 from t1 +where (a,b) in ( select sqrt(a+@1)+CAST(@float AS signed),b +from t1); +affected rows: 3 +info: Records: 3 Duplicates: 0 Warnings: 0 +select a,b from t2 order by a ; +a b +3 duplicate +4 duplicate +103 three +delete from t2 ; +prepare stmt1 from ' insert into t2 (b,a) +select ?, sum(first.a) + from t1 first, t1 second + where first.a <> ? and second.b = first.b and second.b <> ? + group by second.b + having sum(second.a) > ? +union +select b, a + ? from t1 + where (a,b) in ( select sqrt(a+?)+CAST(? AS signed),b + from t1 ) ' ; +execute stmt1 using @duplicate, @5, @five, @2, @100, @1, @float ; +affected rows: 3 +info: Records: 3 Duplicates: 0 Warnings: 0 +select a,b from t2 order by a ; +a b +3 duplicate +4 duplicate +103 three +drop table t2; +drop table if exists t5 ; +set @arg01= 8; +set @arg02= 8.0; +set @arg03= 80.00000000000e-1; +set @arg04= 'abc' ; +set @arg05= CAST('abc' as binary) ; +set @arg06= '1991-08-05' ; +set @arg07= CAST('1991-08-05' as date); +set @arg08= '1991-08-05 01:01:01' ; +set @arg09= CAST('1991-08-05 01:01:01' as datetime) ; +set @arg10= unix_timestamp('1991-01-01 01:01:01'); +set @arg11= YEAR('1991-01-01 01:01:01'); +set @arg12= 8 ; +set @arg12= NULL ; +set @arg13= 8.0 ; +set @arg13= NULL ; +set @arg14= 'abc'; +set @arg14= NULL ; +set @arg15= CAST('abc' as binary) ; +set @arg15= NULL ; +create table t5 as select +8 as const01, @arg01 as param01, +8.0 as const02, @arg02 as param02, +80.00000000000e-1 as const03, @arg03 as param03, +'abc' as const04, @arg04 as param04, +CAST('abc' as binary) as const05, @arg05 as param05, +'1991-08-05' as const06, @arg06 as param06, +CAST('1991-08-05' as date) as const07, @arg07 as param07, +'1991-08-05 01:01:01' as const08, @arg08 as param08, +CAST('1991-08-05 01:01:01' as datetime) as const09, @arg09 as param09, +unix_timestamp('1991-01-01 01:01:01') as const10, @arg10 as param10, +YEAR('1991-01-01 01:01:01') as const11, @arg11 as param11, +NULL as const12, @arg12 as param12, +@arg13 as param13, +@arg14 as param14, +@arg15 as param15; +show create table t5 ; +Table Create Table +t5 CREATE TABLE `t5` ( + `const01` bigint(1) NOT NULL DEFAULT '0', + `param01` bigint(20) DEFAULT NULL, + `const02` decimal(2,1) unsigned NOT NULL DEFAULT '0.0', + `param02` decimal(65,30) DEFAULT NULL, + `const03` double NOT NULL DEFAULT '0', + `param03` double DEFAULT NULL, + `const04` varchar(3) NOT NULL DEFAULT '', + `param04` longtext, + `const05` varbinary(3) NOT NULL DEFAULT '', + `param05` longblob, + `const06` varchar(10) NOT NULL DEFAULT '', + `param06` longtext, + `const07` date DEFAULT NULL, + `param07` longblob, + `const08` varchar(19) NOT NULL DEFAULT '', + `param08` longtext, + `const09` datetime DEFAULT NULL, + `param09` longblob, + `const10` int(10) NOT NULL DEFAULT '0', + `param10` bigint(20) DEFAULT NULL, + `const11` int(4) DEFAULT NULL, + `param11` bigint(20) DEFAULT NULL, + `const12` binary(0) DEFAULT NULL, + `param12` bigint(20) DEFAULT NULL, + `param13` decimal(65,30) DEFAULT NULL, + `param14` longtext, + `param15` longblob +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t5 ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t5 t5 const01 const01 8 1 1 N 32769 0 63 +def test t5 t5 param01 param01 8 20 1 Y 32768 0 63 +def test t5 t5 const02 const02 246 3 3 N 33 1 63 +def test t5 t5 param02 param02 246 67 32 Y 0 30 63 +def test t5 t5 const03 const03 5 17 1 N 32769 31 63 +def test t5 t5 param03 param03 5 23 1 Y 32768 31 63 +def test t5 t5 const04 const04 253 3 3 N 1 0 8 +def test t5 t5 param04 param04 252 4294967295 3 Y 16 0 8 +def test t5 t5 const05 const05 253 3 3 N 129 0 63 +def test t5 t5 param05 param05 252 4294967295 3 Y 144 0 63 +def test t5 t5 const06 const06 253 10 10 N 1 0 8 +def test t5 t5 param06 param06 252 4294967295 10 Y 16 0 8 +def test t5 t5 const07 const07 10 10 10 Y 128 0 63 +def test t5 t5 param07 param07 252 4294967295 10 Y 144 0 63 +def test t5 t5 const08 const08 253 19 19 N 1 0 8 +def test t5 t5 param08 param08 252 4294967295 19 Y 16 0 8 +def test t5 t5 const09 const09 12 19 19 Y 128 0 63 +def test t5 t5 param09 param09 252 4294967295 19 Y 144 0 63 +def test t5 t5 const10 const10 3 10 9 N 32769 0 63 +def test t5 t5 param10 param10 8 20 9 Y 32768 0 63 +def test t5 t5 const11 const11 3 4 4 Y 32768 0 63 +def test t5 t5 param11 param11 8 20 4 Y 32768 0 63 +def test t5 t5 const12 const12 254 0 0 Y 128 0 63 +def test t5 t5 param12 param12 8 20 0 Y 32768 0 63 +def test t5 t5 param13 param13 246 67 0 Y 0 30 63 +def test t5 t5 param14 param14 252 4294967295 0 Y 16 0 8 +def test t5 t5 param15 param15 252 4294967295 0 Y 144 0 63 +const01 8 +param01 8 +const02 8.0 +param02 8.000000000000000000000000000000 +const03 8 +param03 8 +const04 abc +param04 abc +const05 abc +param05 abc +const06 1991-08-05 +param06 1991-08-05 +const07 1991-08-05 +param07 1991-08-05 +const08 1991-08-05 01:01:01 +param08 1991-08-05 01:01:01 +const09 1991-08-05 01:01:01 +param09 1991-08-05 01:01:01 +const10 662680861 +param10 662680861 +const11 1991 +param11 1991 +const12 NULL +param12 NULL +param13 NULL +param14 NULL +param15 NULL +drop table t5 ; +test_sequence +------ data type conversion tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +insert into t9 set c1= 0, c15= '1991-01-01 01:01:01' ; +select * from t9 order by c1 ; +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday +test_sequence +------ select @parameter:= column ------ +prepare full_info from "select @arg01, @arg02, @arg03, @arg04, + @arg05, @arg06, @arg07, @arg08, + @arg09, @arg10, @arg11, @arg12, + @arg13, @arg14, @arg15, @arg16, + @arg17, @arg18, @arg19, @arg20, + @arg21, @arg22, @arg23, @arg24, + @arg25, @arg26, @arg27, @arg28, + @arg29, @arg30, @arg31, @arg32" ; +select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, +@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, +@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, +@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, +@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, +@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, +@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, +@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= 1 ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 8192 10 Y 128 31 63 +def @arg14 253 8192 19 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 8192 1 Y 0 31 8 +def @arg21 253 8192 10 Y 0 31 8 +def @arg22 253 8192 30 Y 0 31 8 +def @arg23 253 8192 8 Y 128 31 63 +def @arg24 253 8192 8 Y 0 31 8 +def @arg25 253 8192 4 Y 128 31 63 +def @arg26 253 8192 4 Y 0 31 8 +def @arg27 253 8192 10 Y 128 31 63 +def @arg28 253 8192 10 Y 0 31 8 +def @arg29 253 8192 8 Y 128 31 63 +def @arg30 253 8192 8 Y 0 31 8 +def @arg31 253 8192 3 Y 0 31 8 +def @arg32 253 8192 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, +@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, +@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, +@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, +@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, +@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, +@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, +@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= 0 ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 8192 0 Y 128 31 63 +def @arg14 253 8192 0 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 8192 0 Y 0 31 8 +def @arg21 253 8192 0 Y 0 31 8 +def @arg22 253 8192 0 Y 0 31 8 +def @arg23 253 8192 0 Y 128 31 63 +def @arg24 253 8192 0 Y 0 31 8 +def @arg25 253 8192 0 Y 128 31 63 +def @arg26 253 8192 0 Y 0 31 8 +def @arg27 253 8192 0 Y 128 31 63 +def @arg28 253 8192 0 Y 0 31 8 +def @arg29 253 8192 0 Y 128 31 63 +def @arg30 253 8192 0 Y 0 31 8 +def @arg31 253 8192 0 Y 0 31 8 +def @arg32 253 8192 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select + @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, + @arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, + @arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, + @arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, + @arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, + @arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, + @arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, + @arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= ?" ; +set @my_key= 1 ; +execute stmt1 using @my_key ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 8192 10 Y 128 31 63 +def @arg14 253 8192 19 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 8192 1 Y 0 31 8 +def @arg21 253 8192 10 Y 0 31 8 +def @arg22 253 8192 30 Y 0 31 8 +def @arg23 253 8192 8 Y 128 31 63 +def @arg24 253 8192 8 Y 0 31 8 +def @arg25 253 8192 4 Y 128 31 63 +def @arg26 253 8192 4 Y 0 31 8 +def @arg27 253 8192 10 Y 128 31 63 +def @arg28 253 8192 10 Y 0 31 8 +def @arg29 253 8192 8 Y 128 31 63 +def @arg30 253 8192 8 Y 0 31 8 +def @arg31 253 8192 3 Y 0 31 8 +def @arg32 253 8192 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +set @my_key= 0 ; +execute stmt1 using @my_key ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 8192 0 Y 128 31 63 +def @arg14 253 8192 0 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 8192 0 Y 0 31 8 +def @arg21 253 8192 0 Y 0 31 8 +def @arg22 253 8192 0 Y 0 31 8 +def @arg23 253 8192 0 Y 128 31 63 +def @arg24 253 8192 0 Y 0 31 8 +def @arg25 253 8192 0 Y 128 31 63 +def @arg26 253 8192 0 Y 0 31 8 +def @arg27 253 8192 0 Y 128 31 63 +def @arg28 253 8192 0 Y 0 31 8 +def @arg29 253 8192 0 Y 128 31 63 +def @arg30 253 8192 0 Y 0 31 8 +def @arg31 253 8192 0 Y 0 31 8 +def @arg32 253 8192 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select ? := c1 from t9 where c1= 1" ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ':= c1 from t9 where c1= 1' at line 1 +test_sequence +------ select column, .. into @parm,.. ------ +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, +c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, +c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, +@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, +@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, +@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= 1 ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 8192 10 Y 128 31 63 +def @arg14 253 8192 19 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 8192 1 Y 0 31 8 +def @arg21 253 8192 10 Y 0 31 8 +def @arg22 253 8192 30 Y 0 31 8 +def @arg23 253 8192 8 Y 128 31 63 +def @arg24 253 8192 8 Y 0 31 8 +def @arg25 253 8192 4 Y 128 31 63 +def @arg26 253 8192 4 Y 0 31 8 +def @arg27 253 8192 10 Y 128 31 63 +def @arg28 253 8192 10 Y 0 31 8 +def @arg29 253 8192 8 Y 128 31 63 +def @arg30 253 8192 8 Y 0 31 8 +def @arg31 253 8192 3 Y 0 31 8 +def @arg32 253 8192 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, +c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, +c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, +@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, +@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, +@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= 0 ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 8192 0 Y 128 31 63 +def @arg14 253 8192 0 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 8192 0 Y 0 31 8 +def @arg21 253 8192 0 Y 0 31 8 +def @arg22 253 8192 0 Y 0 31 8 +def @arg23 253 8192 0 Y 128 31 63 +def @arg24 253 8192 0 Y 0 31 8 +def @arg25 253 8192 0 Y 128 31 63 +def @arg26 253 8192 0 Y 0 31 8 +def @arg27 253 8192 0 Y 128 31 63 +def @arg28 253 8192 0 Y 0 31 8 +def @arg29 253 8192 0 Y 128 31 63 +def @arg30 253 8192 0 Y 0 31 8 +def @arg31 253 8192 0 Y 0 31 8 +def @arg32 253 8192 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, + c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, + c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, + @arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, + @arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, + @arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= ?" ; +set @my_key= 1 ; +execute stmt1 using @my_key ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 8192 10 Y 128 31 63 +def @arg14 253 8192 19 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 8192 1 Y 0 31 8 +def @arg21 253 8192 10 Y 0 31 8 +def @arg22 253 8192 30 Y 0 31 8 +def @arg23 253 8192 8 Y 128 31 63 +def @arg24 253 8192 8 Y 0 31 8 +def @arg25 253 8192 4 Y 128 31 63 +def @arg26 253 8192 4 Y 0 31 8 +def @arg27 253 8192 10 Y 128 31 63 +def @arg28 253 8192 10 Y 0 31 8 +def @arg29 253 8192 8 Y 128 31 63 +def @arg30 253 8192 8 Y 0 31 8 +def @arg31 253 8192 3 Y 0 31 8 +def @arg32 253 8192 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +set @my_key= 0 ; +execute stmt1 using @my_key ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 8192 0 Y 128 31 63 +def @arg14 253 8192 0 Y 128 31 63 +def @arg15 253 8192 19 Y 128 31 63 +def @arg16 253 8192 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 8192 0 Y 0 31 8 +def @arg21 253 8192 0 Y 0 31 8 +def @arg22 253 8192 0 Y 0 31 8 +def @arg23 253 8192 0 Y 128 31 63 +def @arg24 253 8192 0 Y 0 31 8 +def @arg25 253 8192 0 Y 128 31 63 +def @arg26 253 8192 0 Y 0 31 8 +def @arg27 253 8192 0 Y 128 31 63 +def @arg28 253 8192 0 Y 0 31 8 +def @arg29 253 8192 0 Y 128 31 63 +def @arg30 253 8192 0 Y 0 31 8 +def @arg31 253 8192 0 Y 0 31 8 +def @arg32 253 8192 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select c1 into ? from t9 where c1= 1" ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? from t9 where c1= 1' at line 1 +test_sequence +-- insert into numeric columns -- +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 ) ; +set @arg00= 21 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 )" ; +execute stmt1 ; +set @arg00= 23; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, +30.0, 30.0, 30.0 ) ; +set @arg00= 31.0 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, + 32.0, 32.0, 32.0 )" ; +execute stmt1 ; +set @arg00= 33.0; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( '40', '40', '40', '40', '40', '40', '40', '40', +'40', '40', '40' ) ; +set @arg00= '41' ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( '42', '42', '42', '42', '42', '42', '42', '42', + '42', '42', '42' )" ; +execute stmt1 ; +set @arg00= '43'; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary) ) ; +set @arg00= CAST('51' as binary) ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary) )" ; +execute stmt1 ; +set @arg00= CAST('53' as binary) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 2 ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +NULL, NULL, NULL ) ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 61, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 62, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL )" ; +execute stmt1 ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 63, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 8.0 ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 71, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 73, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 'abc' ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 81, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 83, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 +from t9 where c1 >= 20 +order by c1 ; +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c12 +20 20 20 20 20 20 20 20 20 20 20.0000 +21 21 21 21 21 21 21 21 21 21 21.0000 +22 22 22 22 22 22 22 22 22 22 22.0000 +23 23 23 23 23 23 23 23 23 23 23.0000 +30 30 30 30 30 30 30 30 30 30 30.0000 +31 31 31 31 31 31 31 31 31 31 31.0000 +32 32 32 32 32 32 32 32 32 32 32.0000 +33 33 33 33 33 33 33 33 33 33 33.0000 +40 40 40 40 40 40 40 40 40 40 40.0000 +41 41 41 41 41 41 41 41 41 41 41.0000 +42 42 42 42 42 42 42 42 42 42 42.0000 +43 43 43 43 43 43 43 43 43 43 43.0000 +50 50 50 50 50 50 50 50 50 50 50.0000 +51 51 51 51 51 51 51 51 51 51 51.0000 +52 52 52 52 52 52 52 52 52 52 52.0000 +53 53 53 53 53 53 53 53 53 53 53.0000 +60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +test_sequence +-- select .. where numeric column = .. -- +set @arg00= 20; +select 'true' as found from t9 +where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20 +and c8= 20 and c9= 20 and c10= 20 and c12= 20; +found +true +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20 + and c8= 20 and c9= 20 and c10= 20 and c12= 20 "; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20.0; +select 'true' as found from t9 +where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0 +and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0; +found +true +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0 + and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0 "; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +select 'true' as found from t9 +where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20' + and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20'; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20' + and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20' "; +execute stmt1 ; +found +true +set @arg00= '20'; +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +select 'true' as found from t9 +where c1= CAST('20' as binary) and c2= CAST('20' as binary) and +c3= CAST('20' as binary) and c4= CAST('20' as binary) and +c5= CAST('20' as binary) and c6= CAST('20' as binary) and +c7= CAST('20' as binary) and c8= CAST('20' as binary) and +c9= CAST('20' as binary) and c10= CAST('20' as binary) and +c12= CAST('20' as binary); +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= CAST('20' as binary) and c2= CAST('20' as binary) and + c3= CAST('20' as binary) and c4= CAST('20' as binary) and + c5= CAST('20' as binary) and c6= CAST('20' as binary) and + c7= CAST('20' as binary) and c8= CAST('20' as binary) and + c9= CAST('20' as binary) and c10= CAST('20' as binary) and + c12= CAST('20' as binary) "; +execute stmt1 ; +found +true +set @arg00= CAST('20' as binary) ; +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +delete from t9 ; +test_sequence +-- some numeric overflow experiments -- +prepare my_insert from "insert into t9 + ( c21, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 'O', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +prepare my_select from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 +from t9 where c21 = 'O' "; +prepare my_delete from "delete from t9 where c21 = 'O' "; +set @arg00= 9223372036854775807 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 9.22337e+18 +c8 9.22337203685478e+18 +c9 9.22337203685478e+18 +c10 9.22337203685478e+18 +c12 9999.9999 +execute my_delete ; +set @arg00= '9223372036854775807' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 9.22337e+18 +c8 9.22337203685478e+18 +c9 9.22337203685478e+18 +c10 9.22337203685478e+18 +c12 9999.9999 +execute my_delete ; +set @arg00= -9223372036854775808 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -9.22337e+18 +c8 -9.22337203685478e+18 +c9 -9.22337203685478e+18 +c10 -9.22337203685478e+18 +c12 -9999.9999 +execute my_delete ; +set @arg00= '-9223372036854775808' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -9.22337e+18 +c8 -9.22337203685478e+18 +c9 -9.22337203685478e+18 +c10 -9.22337203685478e+18 +c12 -9999.9999 +execute my_delete ; +set @arg00= 1.11111111111111111111e+50 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 3.40282e+38 +c8 1.11111111111111e+50 +c9 1.11111111111111e+50 +c10 1.11111111111111e+50 +c12 9999.9999 +execute my_delete ; +set @arg00= '1.11111111111111111111e+50' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1265 Data truncated for column 'c1' at row 1 +Warning 1265 Data truncated for column 'c2' at row 1 +Warning 1265 Data truncated for column 'c3' at row 1 +Warning 1265 Data truncated for column 'c4' at row 1 +Warning 1265 Data truncated for column 'c5' at row 1 +Warning 1265 Data truncated for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 1 +c2 1 +c3 1 +c4 1 +c5 1 +c6 1 +c7 3.40282e+38 +c8 1.11111111111111e+50 +c9 1.11111111111111e+50 +c10 1.11111111111111e+50 +c12 9999.9999 +execute my_delete ; +set @arg00= -1.11111111111111111111e+50 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -3.40282e+38 +c8 -1.11111111111111e+50 +c9 -1.11111111111111e+50 +c10 -1.11111111111111e+50 +c12 -9999.9999 +execute my_delete ; +set @arg00= '-1.11111111111111111111e+50' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1265 Data truncated for column 'c1' at row 1 +Warning 1265 Data truncated for column 'c2' at row 1 +Warning 1265 Data truncated for column 'c3' at row 1 +Warning 1265 Data truncated for column 'c4' at row 1 +Warning 1265 Data truncated for column 'c5' at row 1 +Warning 1265 Data truncated for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -1 +c2 -1 +c3 -1 +c4 -1 +c5 -1 +c6 -1 +c7 -3.40282e+38 +c8 -1.11111111111111e+50 +c9 -1.11111111111111e+50 +c10 -1.11111111111111e+50 +c12 -9999.9999 +execute my_delete ; +test_sequence +-- insert into string columns -- +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +select c1, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30 +from t9 where c1 >= 20 +order by c1 ; +c1 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 +20 2 20 20 20 20 20 20 20 20 20 20 +21 2 21 21 21 21 21 21 21 21 21 21 +22 2 22 22 22 22 22 22 22 22 22 22 +23 2 23 23 23 23 23 23 23 23 23 23 +30 3 30 30 30 30 30 30 30 30 30 30 +31 3 31 31 31 31 31 31 31 31 31 31 +32 3 32 32 32 32 32 32 32 32 32 32 +33 3 33 33 33 33 33 33 33 33 33 33 +40 4 40 40 40 40 40 40 40 40 40 40 +41 4 41 41 41 41 41 41 41 41 41 41 +42 4 42 42 42 42 42 42 42 42 42 42 +43 4 43 43 43 43 43 43 43 43 43 43 +50 5 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 +51 5 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 +52 5 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 +53 5 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 +54 5 54 54 54.00 54.00 54.00 54.00 54.00 54.00 54.00 54.00 +55 5 55 55 55 55 55 55 55 55 55 55 +56 6 56 56 56.00 56.00 56.00 56.00 56.00 56.00 56.00 56.00 +57 6 57 57 57.00 57.00 57.00 57.00 57.00 57.00 57.00 57.00 +60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +test_sequence +-- select .. where string column = .. -- +set @arg00= '20'; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and +c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and +c27= '20' and c28= '20' and c29= '20' and c30= '20' ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and + c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and + c27= '20' and c28= '20' and c29= '20' and c30= '20'" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= CAST('20' as binary); +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20))) += CAST('20' as binary) and c21= CAST('20' as binary) +and c22= CAST('20' as binary) and c23= CAST('20' as binary) and +c24= CAST('20' as binary) and c25= CAST('20' as binary) and +c26= CAST('20' as binary) and c27= CAST('20' as binary) and +c28= CAST('20' as binary) and c29= CAST('20' as binary) and +c30= CAST('20' as binary) ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20))) = @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and +c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20))) + = CAST('20' as binary) and c21= CAST('20' as binary) + and c22= CAST('20' as binary) and c23= CAST('20' as binary) and + c24= CAST('20' as binary) and c25= CAST('20' as binary) and + c26= CAST('20' as binary) and c27= CAST('20' as binary) and + c28= CAST('20' as binary) and c29= CAST('20' as binary) and + c30= CAST('20' as binary)" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20))) = ? and c21= ? and + c22= ? and c23= ? and c25= ? and c26= ? and c27= ? and c28= ? and + c29= ? and c30= ?"; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and +c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and +c27= 20 and c28= 20 and c29= 20 and c30= 20 ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and + c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and + c27= 20 and c28= 20 and c29= 20 and c30= 20" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20.0; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and +c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and +c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0 ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and + c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and + c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +delete from t9 ; +test_sequence +-- insert into date/time columns -- +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +select c1, c13, c14, c15, c16, c17 from t9 order by c1 ; +c1 c13 c14 c15 c16 c17 +20 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +21 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +22 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +23 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +30 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +31 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +32 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +33 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +40 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +41 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +42 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +43 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +50 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +51 2010-00-00 2010-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +52 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +53 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +60 NULL NULL 1991-01-01 01:01:01 NULL NULL +61 NULL NULL 1991-01-01 01:01:01 NULL NULL +62 NULL NULL 1991-01-01 01:01:01 NULL NULL +63 NULL NULL 1991-01-01 01:01:01 NULL NULL +71 NULL NULL 1991-01-01 01:01:01 NULL NULL +73 NULL NULL 1991-01-01 01:01:01 NULL NULL +81 NULL NULL 1991-01-01 01:01:01 NULL NULL +83 NULL NULL 1991-01-01 01:01:01 NULL NULL +test_sequence +-- select .. where date/time column = .. -- +set @arg00= '1991-01-01 01:01:01' ; +select 'true' as found from t9 +where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and +c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and +c17= '1991-01-01 01:01:01' ; +found +true +select 'true' as found from t9 +where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00 +and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and + c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and + c17= '1991-01-01 01:01:01'" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= CAST('1991-01-01 01:01:01' as datetime) ; +select 'true' as found from t9 +where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and +c14= CAST('1991-01-01 01:01:01' as datetime) and +c15= CAST('1991-01-01 01:01:01' as datetime) and +c16= CAST('1991-01-01 01:01:01' as datetime) and +c17= CAST('1991-01-01 01:01:01' as datetime) ; +found +true +select 'true' as found from t9 +where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00 +and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and + c14= CAST('1991-01-01 01:01:01' as datetime) and + c15= CAST('1991-01-01 01:01:01' as datetime) and + c16= CAST('1991-01-01 01:01:01' as datetime) and + c17= CAST('1991-01-01 01:01:01' as datetime)" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 1991 ; +select 'true' as found from t9 +where c1= 20 and c17= 1991 ; +found +true +select 'true' as found from t9 +where c1= 20 and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c17= 1991" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c17= ?" ; +execute stmt1 using @arg00 ; +found +true +set @arg00= 1.991e+3 ; +select 'true' as found from t9 +where c1= 20 and abs(c17 - 1.991e+3) < 0.01 ; +found +true +select 'true' as found from t9 +where c1= 20 and abs(c17 - @arg00) < 0.01 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and abs(c17 - 1.991e+3) < 0.01" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and abs(c17 - ?) < 0.01" ; +execute stmt1 using @arg00 ; +found +true +drop table t1, t9; diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test new file mode 100644 index 00000000000..59d36206039 --- /dev/null +++ b/mysql-test/t/maria.test @@ -0,0 +1,802 @@ +# +# Test bugs in the MARIA code +# +-- source include/have_maria.inc + +# Initialise +--disable_warnings +drop table if exists t1,t2; +--enable_warnings +SET SQL_WARNINGS=1; + +# +# Test problem with CHECK TABLE; +# + +CREATE TABLE t1 ( + STRING_DATA char(255) default NULL, + KEY string_data (STRING_DATA) +) ENGINE=MARIA; + +INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); +INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); +INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); +INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG'); +INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH'); +INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW'); +CHECK TABLE t1; +drop table t1; + +# +# Test problem with rows that are 65517-65520 bytes long +# + +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)) engine=maria; + +let $1=100; +disable_query_log; +--disable_warnings +SET SQL_WARNINGS=0; +while ($1) +{ + eval insert into t1 (b) values(repeat(char(65+$1),65550-$1)); + dec $1; +} +SET SQL_WARNINGS=1; +--enable_warnings +enable_query_log; +check table t1; +repair table t1; +delete from t1 where (a & 1); +check table t1; +repair table t1; +check table t1; +drop table t1; + +# +# Test bug: Two optimize in a row reset index cardinality +# + +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)) engine=maria; +insert into t1 (b) values (1),(2),(2),(2),(2); +optimize table t1; +show index from t1; +optimize table t1; +show index from t1; +drop table t1; + +# +# Test of how ORDER BY works when doing it on the whole table +# + +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)) engine=maria; +insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); +explain select * from t1 order by a; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select a from t1 order by a; +explain select b from t1 order by b; +explain select a,b from t1 order by b; +explain select a,b from t1; +explain select a,b,c from t1; +drop table t1; + +# +# Test of OPTIMIZE of locked and modified tables +# +CREATE TABLE t1 (a INT) engine=maria; +INSERT INTO t1 VALUES (1), (2), (3); +LOCK TABLES t1 WRITE; +INSERT INTO t1 VALUES (1), (2), (3); +OPTIMIZE TABLE t1; +DROP TABLE t1; + +# +# Test of optimize, when only mi_sort_index (but not mi_repair*) is done +# in ha_maria::repair, and index size is changed (decreased). +# + +create table t1 ( t1 char(255), key(t1(250))) engine=maria; +insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); +insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); +insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); +insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907'); +insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011'); +insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101'); +insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564'); +insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002'); +insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834'); +insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016'); +insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899'); +insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482'); +insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139'); +insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477'); +insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755'); +insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188'); +insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454'); +insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656'); +insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741'); +insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178'); +insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049'); +insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635'); +insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573'); +insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878'); +insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077'); +delete from t1 where t1>'2'; +insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'), +('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'), +('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47'); +optimize table t1; +check table t1; +drop table t1; + +# +# test of maria with huge number of packed fields +# + +create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 +int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 +int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int, +i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34 +int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int, +i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51 +int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int, +i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68 +int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int, +i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85 +int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int, +i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102 +int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110 +int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118 +int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126 +int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134 +int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142 +int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150 +int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158 +int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166 +int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174 +int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182 +int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190 +int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198 +int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206 +int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214 +int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222 +int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230 +int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238 +int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246 +int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254 +int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262 +int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270 +int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278 +int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286 +int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294 +int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302 +int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310 +int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318 +int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326 +int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334 +int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342 +int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350 +int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358 +int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366 +int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374 +int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382 +int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390 +int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398 +int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406 +int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414 +int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422 +int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430 +int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438 +int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446 +int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454 +int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462 +int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470 +int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478 +int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486 +int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494 +int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502 +int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510 +int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518 +int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526 +int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534 +int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542 +int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550 +int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558 +int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566 +int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574 +int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582 +int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590 +int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598 +int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606 +int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614 +int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622 +int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630 +int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638 +int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646 +int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654 +int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662 +int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670 +int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678 +int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686 +int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694 +int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702 +int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710 +int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718 +int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726 +int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734 +int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742 +int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750 +int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758 +int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766 +int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774 +int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782 +int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790 +int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798 +int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806 +int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814 +int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822 +int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830 +int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838 +int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846 +int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854 +int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862 +int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870 +int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878 +int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886 +int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894 +int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902 +int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910 +int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918 +int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926 +int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934 +int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942 +int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950 +int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958 +int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966 +int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 +int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 +int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 +int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 +int, i999 int, i1000 int, b blob) row_format=dynamic engine=maria; +insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei"); +update t1 set b=repeat('a',256); +update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0; +check table t1; +delete from t1 where i8=1; +select i1,i2 from t1; +check table t1; +drop table t1; + +# +# Test of REPAIR that once failed +# +CREATE TABLE `t1` ( + `post_id` mediumint(8) unsigned NOT NULL auto_increment, + `topic_id` mediumint(8) unsigned NOT NULL default '0', + `post_time` datetime NOT NULL default '0000-00-00 00:00:00', + `post_text` text NOT NULL, + `icon_url` varchar(10) NOT NULL default '', + `sign` tinyint(1) unsigned NOT NULL default '0', + `post_edit` varchar(150) NOT NULL default '', + `poster_login` varchar(35) NOT NULL default '', + `ip` varchar(15) NOT NULL default '', + PRIMARY KEY (`post_id`), + KEY `post_time` (`post_time`), + KEY `ip` (`ip`), + KEY `poster_login` (`poster_login`), + KEY `topic_id` (`topic_id`), + FULLTEXT KEY `post_text` (`post_text`) +) ENGINE=MARIA; + +INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); + +REPAIR TABLE t1; +CHECK TABLE t1; +drop table t1; + +# +# Test of creating table with too long key +# + +--error 1071 +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)) engine=maria; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)) engine=maria; +--error 1071 +ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); +DROP TABLE t1; + +# +# Test of cardinality of keys with NULL +# + +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)) engine=maria; +INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)) engine=maria; +INSERT into t2 values (1,1,1), (2,2,2); +optimize table t1; +show index from t1; +explain select * from t1,t2 where t1.a=t2.a; +explain select * from t1,t2 force index(a) where t1.a=t2.a; +explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; +explain select * from t1,t2 where t1.b=t2.b; +explain select * from t1,t2 force index(c) where t1.a=t2.a; +explain select * from t1 where a=0 or a=2; +explain select * from t1 force index (a) where a=0 or a=2; +explain select * from t1 where c=1; +explain select * from t1 use index() where c=1; +drop table t1,t2; + +# +# Test bug when updating a split dynamic row where keys are not changed +# + +create table t1 (a int not null auto_increment primary key, b varchar(255)) engine=maria; +insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); +update t1 set b=repeat(left(b,1),200) where a=1; +delete from t1 where (a & 1)= 0; +update t1 set b=repeat('e',200) where a=1; +flush tables; +check table t1; + +# +# check updating with keys +# + +disable_query_log; +let $1 = 100; +while ($1) +{ + eval insert into t1 (b) values (repeat(char(($1 & 32)+65), $1)); + dec $1; +} +enable_query_log; +update t1 set b=repeat(left(b,1),255) where a between 1 and 5; +update t1 set b=repeat(left(b,1),10) where a between 32 and 43; +update t1 set b=repeat(left(b,1),2) where a between 64 and 66; +update t1 set b=repeat(left(b,1),65) where a between 67 and 70; +check table t1; +insert into t1 (b) values (repeat('z',100)); +update t1 set b="test" where left(b,1) > 'n'; +check table t1; +drop table t1; + +# +# two bugs in maria-space-stripping feature +# +create table t1 ( a text not null, key a (a(20))) engine=maria; +insert into t1 values ('aaa '),('aaa'),('aa'); +check table t1; +repair table t1; +select concat(a,'.') from t1 where a='aaa'; +select concat(a,'.') from t1 where binary a='aaa'; +update t1 set a='bbb' where a='aaa'; +select concat(a,'.') from t1; +drop table t1; + +# +# Third bug in the same code (BUG#2295) +# + +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))) engine=maria; +insert into t1 values('807780', '477', '165'); +insert into t1 values('807780', '477', '162'); +insert into t1 values('807780', '472', '162'); +select * from t1 where a='807780' and b='477' and c='165'; +drop table t1; + +# +# space-stripping in _mi_prefix_search: BUG#5284 +# +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)) engine=maria; +INSERT t1 VALUES ("can \tcan"); +INSERT t1 VALUES ("can can"); +INSERT t1 VALUES ("can"); +SELECT * FROM t1; +CHECK TABLE t1; +DROP TABLE t1; + +# +# Verify blob handling +# +create table t1 (a blob) engine=maria; +insert into t1 values('a '),('a'); +select concat(a,'.') from t1 where a='a'; +select concat(a,'.') from t1 where a='a '; +alter table t1 add key(a(2)); +select concat(a,'.') from t1 where a='a'; +select concat(a,'.') from t1 where a='a '; +drop table t1; + +# +# Test text and unique +# +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))) engine=maria; +insert into t1 (b) values ('a'),('b'),('c'); +select concat(b,'.') from t1; +update t1 set b='b ' where a=2; +--error 1062 +update t1 set b='b ' where a > 1; +--error 1062 +insert into t1 (b) values ('b'); +select * from t1; +delete from t1 where b='b'; +select a,concat(b,'.') from t1; +drop table t1; + +# +# Test keys with 0 segments. (Bug #3203) +# +create table t1 (a int not null) engine=maria; +create table t2 (a int not null, primary key (a)) engine=maria; +insert into t1 values (1); +insert into t2 values (1),(2); +select sql_big_result distinct t1.a from t1,t2 order by t2.a; +select distinct t1.a from t1,t2 order by t2.a; +select sql_big_result distinct t1.a from t1,t2; +explain select sql_big_result distinct t1.a from t1,t2 order by t2.a; +explain select distinct t1.a from t1,t2 order by t2.a; +drop table t1,t2; + +# +# Bug#14616 - Freshly imported table returns error 124 when using LIMIT +# +create table t1 ( + c1 varchar(32), + key (c1) +) engine=maria; +alter table t1 disable keys; +insert into t1 values ('a'), ('b'); +select c1 from t1 order by c1 limit 1; +drop table t1; + +# +# Test RTREE index +# +--error 1235, 1289 +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)) ENGINE=MARIA; +# INSERT INTO t1 VALUES (1,1),(1,1); +# DELETE FROM rt WHERE a<1; +# DROP TABLE IF EXISTS t1; + +create table t1 (a int, b varchar(200), c text not null) checksum=1 engine=maria; +create table t2 (a int, b varchar(200), c text not null) checksum=0 engine=maria; +insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); +insert t2 select * from t1; +checksum table t1, t2, t3 quick; +checksum table t1, t2, t3; +checksum table t1, t2, t3 extended; +#show table status; +drop table t1,t2; + +create table t1 (a int, key (a)) engine=maria; +show keys from t1; +alter table t1 disable keys; +show keys from t1; +create table t2 (a int) engine=maria; +let $i=1000; +set @@rand_seed1=31415926,@@rand_seed2=2718281828; +--disable_query_log +while ($i) +{ + dec $i; + insert t2 values (rand()*100000); +} +--enable_query_log +insert t1 select * from t2; +show keys from t1; +alter table t1 enable keys; +show keys from t1; +alter table t1 engine=heap; +alter table t1 disable keys; +show keys from t1; +drop table t1,t2; + +# +# index search for NULL in blob. Bug #4816 +# +create table t1 ( a tinytext, b char(1), index idx (a(1),b)) engine=maria; +insert into t1 values (null,''), (null,''); +explain select count(*) from t1 where a is null; +select count(*) from t1 where a is null; +drop table t1; + +# +# bug9188 - Corruption Can't open file: 'table.MYI' (errno: 145) +# +create table t1 (c1 int, c2 varchar(4) not null default '', + key(c2(3))) default charset=utf8 engine=maria; +insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); +update t1 set c2='A B' where c1=2; +check table t1; +drop table t1; + + +# +# Bug#12296 - CHECKSUM TABLE reports 0 for the table +# This happened if the first record was marked as deleted. +# +create table t1 (c1 int) engine=maria; +insert into t1 values (1),(2),(3),(4); +checksum table t1; +delete from t1 where c1 = 1; +create table t2 engine=maria as select * from t1; +# The following returns 0 with the bug in place. +checksum table t1; +# The above should give the same number as the following. +checksum table t2; +drop table t1, t2; + +# +# BUG#12232: New maria_stats_method variable. +# + +show variables like 'maria_stats_method'; + +create table t1 (a int, key(a)) engine=maria; +insert into t1 values (0),(1),(2),(3),(4); +insert into t1 select NULL from t1; + +# default: NULLs considered inequal +analyze table t1; +show index from t1; +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +show index from t1; + +# Set nulls to be equal: +set maria_stats_method=nulls_equal; +show variables like 'maria_stats_method'; +insert into t1 values (11); +delete from t1 where a=11; + +analyze table t1; +show index from t1; + +insert into t1 values (11); +delete from t1 where a=11; + +check table t1; +show index from t1; + +# Set nulls back to be equal +set maria_stats_method=DEFAULT; +show variables like 'maria_stats_method'; +insert into t1 values (11); +delete from t1 where a=11; + +analyze table t1; +show index from t1; + +insert into t1 values (11); +delete from t1 where a=11; + +check table t1; +show index from t1; + +drop table t1; + +# WL#2609, CSC#XXXX: MARIA +set maria_stats_method=nulls_ignored; +show variables like 'maria_stats_method'; + +create table t1 ( + a char(3), b char(4), c char(5), d char(6), + key(a,b,c,d) +) engine=maria; +insert into t1 values ('bcd','def1', NULL, 'zz'); +insert into t1 values ('bcd','def2', NULL, 'zz'); +insert into t1 values ('bce','def1', 'yuu', NULL); +insert into t1 values ('bce','def2', NULL, 'quux'); +analyze table t1; +show index from t1; +delete from t1; +analyze table t1; +show index from t1; + +set maria_stats_method=DEFAULT; +drop table t1; + +# BUG#13814 - key value packed incorrectly for TINYBLOBs + +create table t1( + cip INT NOT NULL, + time TIME NOT NULL, + score INT NOT NULL DEFAULT 0, + bob TINYBLOB +) engine=maria; + +insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); +insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), + (6, 'c', '00:06'); +select * from t1 where bob is null and cip=1; +create index bug on t1 (bob(22), cip, time); +select * from t1 where bob is null and cip=1; +drop table t1; + +# +# Bug#14980 - COUNT(*) incorrect on MARIA table with certain INDEX +# +create table t1 ( + id1 int not null auto_increment, + id2 int not null default '0', + t text not null, + primary key (id1), + key x (id2, t(32)) +) engine=maria; +insert into t1 (id2, t) values +(10, 'abc'), (10, 'abc'), (10, 'abc'), +(20, 'abc'), (20, 'abc'), (20, 'def'), +(10, 'abc'), (10, 'abc'); +select count(*) from t1 where id2 = 10; +select count(id1) from t1 where id2 = 10; +drop table t1; + +# End of 4.1 tests + +# +# Test varchar +# + +let $default=`select @@storage_engine`; +set storage_engine=MARIA; +source include/varchar.inc; + +# +# Some errors/warnings on create +# + +create table t1 (v varchar(65530), key(v)) engine=maria; +drop table if exists t1; +create table t1 (v varchar(65536)) engine=maria; +show create table t1; +drop table t1; +create table t1 (v varchar(65530) character set utf8) engine=maria; +show create table t1; +drop table t1; + +# MARIA specific varchar tests +--error 1118 +create table t1 (v varchar(65535)) engine=maria; + +eval set storage_engine=$default; + +# +# Test how DROP TABLE works if the index or data file doesn't exists + +create table t1 (a int) engine=maria; +system rm $MYSQLTEST_VARDIR/master-data/test/t1.MAI ; +drop table if exists t1; +create table t1 (a int) engine=maria; +system rm $MYSQLTEST_VARDIR/master-data/test/t1.MAI ; +--error 1051,6 +drop table t1; +create table t1 (a int) engine=maria; +system rm $MYSQLTEST_VARDIR/master-data/test/t1.MAD ; +--error 1105,6,29 +drop table t1; +--error 1051 +drop table t1; + +# +# Test concurrent insert +# First with static record length +# +set @save_concurrent_insert=@@concurrent_insert; +set global concurrent_insert=1; +create table t1 (a int) engine=maria; +insert into t1 values (1),(2),(3),(4),(5); +lock table t1 read local; +connect (con1,localhost,root,,); +connection con1; +# Insert in table without hole +insert into t1 values(6),(7); +connection default; +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +connection con1; +set global concurrent_insert=2; +# Insert in table with hole -> Should insert at end +insert into t1 values (8),(9); +connection default; +unlock tables; +# Insert into hole +insert into t1 values (10),(11),(12); +select * from t1; +check table t1; +drop table t1; +disconnect con1; + +# Same test with dynamic record length +create table t1 (a int, b varchar(30) default "hello") engine=maria; +insert into t1 (a) values (1),(2),(3),(4),(5); +lock table t1 read local; +connect (con1,localhost,root,,); +connection con1; +# Insert in table without hole +insert into t1 (a) values(6),(7); +connection default; +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +connection con1; +set global concurrent_insert=2; +# Insert in table with hole -> Should insert at end +insert into t1 (a) values (8),(9); +connection default; +unlock tables; +# Insert into hole +insert into t1 (a) values (10),(11),(12); +select a from t1; +check table t1; +drop table t1; +disconnect con1; +set global concurrent_insert=@save_concurrent_insert; + + +# BUG#9622 - ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX produce +# different statistics on the same table with NULL values. +create table t1 (a int, key(a)) engine=maria; + +insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); +analyze table t1; +show keys from t1; + +alter table t1 disable keys; +alter table t1 enable keys; +show keys from t1; + +drop table t1; + +# +# Bug#10056 - PACK_KEYS option take values greater than 1 while creating table +# +create table t1 (c1 int) engine=maria pack_keys=0; +create table t2 (c1 int) engine=maria pack_keys=1; +create table t3 (c1 int) engine=maria pack_keys=default; +--error 1064 +create table t4 (c1 int) engine=maria pack_keys=2; +drop table t1, t2, t3; + +# End of 5.2 tests diff --git a/mysql-test/t/ps_maria.test b/mysql-test/t/ps_maria.test new file mode 100644 index 00000000000..2be99842132 --- /dev/null +++ b/mysql-test/t/ps_maria.test @@ -0,0 +1,46 @@ +############################################### +# # +# Prepared Statements test on MARIA tables # +# # +############################################### + +# +# NOTE: PLEASE SEE ps_1general.test (bottom) +# BEFORE ADDING NEW TEST CASES HERE !!! + +use test; + +-- source include/have_maria.inc + +let $type= 'MARIA' ; +-- source include/ps_create.inc +-- source include/ps_renew.inc + +-- source include/ps_query.inc + +# parameter in SELECT ... MATCH/AGAINST +# case derived from client_test.c: test_bug1500() +--disable_warnings +drop table if exists t2 ; +--enable_warnings +eval create table t2 (s varchar(25), fulltext(s)) +ENGINE = $type ; +insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ; +commit ; + +prepare stmt1 from ' select s from t2 where match (s) against (?) ' ; +set @arg00='Dogs' ; +execute stmt1 using @arg00 ; +prepare stmt1 from ' SELECT s FROM t2 +where match (s) against (concat(?,''digger'')) '; +set @arg00='Grave' ; +execute stmt1 using @arg00 ; +drop table t2 ; + +-- source include/ps_modify.inc +-- source include/ps_modify1.inc +-- source include/ps_conv.inc + +drop table t1, t9; + +# End of 4.1 tests diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c index 38fef31fdd4..fdac198697d 100644 --- a/mysys/mf_keycaches.c +++ b/mysys/mf_keycaches.c @@ -148,7 +148,8 @@ static void safe_hash_free(SAFE_HASH *hash) Return the value stored for a key or default value if no key */ -static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length) +static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def) { byte *result; DBUG_ENTER("safe_hash_search"); @@ -156,7 +157,7 @@ static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length) result= hash_search(&hash->hash, key, length); rw_unlock(&hash->mutex); if (!result) - result= hash->default_value; + result= def; else result= ((SAFE_HASH_ENTRY*) result)->data; DBUG_PRINT("exit",("data: 0x%lx", result)); @@ -316,6 +317,7 @@ void multi_keycache_free(void) multi_key_cache_search() key key to find (usually table path) uint length Length of key. + def Default value if no key cache NOTES This function is coded in such a way that we will return the @@ -326,11 +328,13 @@ void multi_keycache_free(void) key cache to use */ -KEY_CACHE *multi_key_cache_search(byte *key, uint length) +KEY_CACHE *multi_key_cache_search(byte *key, uint length, + KEY_CACHE *def) { if (!key_cache_hash.hash.records) - return dflt_key_cache; - return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length); + return def; + return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length, + (void*) def); } diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 56f2298a9f0..5f7bf5ff1a8 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -15,9 +15,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#include "my_handler.h" +#include +#include +#include +#include -int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, +int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, my_bool skip_end_space) { @@ -173,7 +176,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -186,7 +189,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, { uint length=(uint) (end-a), a_length=length, b_length=length; if (piks && - (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length, + (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -234,7 +237,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool) ((nextflag & (SEARCH_FIND | @@ -481,12 +484,15 @@ end: DESCRIPTION Find the first NULL value in index-suffix values tuple. - TODO Consider optimizing this fuction or its use so we don't search for - NULL values in completely NOT NULL index suffixes. + + TODO + Consider optimizing this function or its use so we don't search for + NULL values in completely NOT NULL index suffixes. RETURN - First key part that has NULL as value in values tuple, or the last key part - (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs. + First key part that has NULL as value in values tuple, or the last key + part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain + NULLs. */ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) @@ -504,6 +510,7 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) switch ((enum ha_base_keytype) keyseg->type) { case HA_KEYTYPE_TEXT: case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_BIT: if (keyseg->flag & HA_SPACE_PACK) { int a_length; @@ -516,6 +523,8 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) break; case HA_KEYTYPE_VARTEXT1: case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: { int a_length; get_key_length(a_length, a); @@ -545,6 +554,9 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) case HA_KEYTYPE_DOUBLE: a= end; break; + case HA_KEYTYPE_END: + DBUG_ASSERT(0); + break; } } return keyseg; diff --git a/sql/ha_maria.cc b/sql/ha_maria.cc new file mode 100644 index 00000000000..46324d0b188 --- /dev/null +++ b/sql/ha_maria.cc @@ -0,0 +1,1836 @@ +/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" +#include +#include +#include "ha_maria.h" +#ifndef MASTER +#include "../srclib/maria/maria_def.h" +#else +#include "../storage/maria/maria_def.h" +#include "../storage/maria/ma_rt_index.h" +#endif + +ulong maria_recover_options= HA_RECOVER_NONE; +static bool ha_maria_init(); + +/* bits in maria_recover_options */ +const char *maria_recover_names[]= +{ + "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS +}; +TYPELIB maria_recover_typelib= +{ + array_elements(maria_recover_names) - 1, "", + maria_recover_names, NULL +}; + +const char *maria_stats_method_names[]= +{ + "nulls_unequal", "nulls_equal", + "nulls_ignored", NullS +}; +TYPELIB maria_stats_method_typelib= +{ + array_elements(maria_stats_method_names) - 1, "", + maria_stats_method_names, NULL +}; + + +/***************************************************************************** +** MARIA tables +*****************************************************************************/ + +static handler *maria_create_handler(TABLE_SHARE * table); + +/* MARIA handlerton */ + +handlerton maria_hton= +{ + MYSQL_HANDLERTON_INTERFACE_VERSION, + "MARIA", + SHOW_OPTION_YES, + "Transactional storage engine, optimized for long running transactions", + DB_TYPE_MARIA, + ha_maria_init, + 0, /* slot */ + 0, /* savepoint size. */ + NULL, /* close_connection */ + NULL, /* savepoint */ + NULL, /* rollback to savepoint */ + NULL, /* release savepoint */ + NULL, /* commit */ + NULL, /* rollback */ + NULL, /* prepare */ + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL, /* rollback_by_xid */ + NULL, /* create_cursor_read_view */ + NULL, /* set_cursor_read_view */ + NULL, /* close_cursor_read_view */ + /* + MARIA doesn't support transactions yet and doesn't have + transaction-dependent context: cursors can survive a commit. + */ + maria_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + maria_panic, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill Files Table */ + HTON_CAN_RECREATE, + NULL, /* binlog_func */ + NULL, /* binlog_log_query */ + NULL /* release_temporary_latches */ +}; + + +static bool ha_maria_init() +{ + return test(maria_init()); +} + +static handler *maria_create_handler(TABLE_SHARE *table) +{ + return new ha_maria(table); +} + + +// collect errors printed by maria_check routines + +static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, + const char *fmt, va_list args) +{ + THD *thd= (THD *) param->thd; + Protocol *protocol= thd->protocol; + uint length, msg_length; + char msgbuf[MARIA_MAX_MSG_BUF]; + char name[NAME_LEN * 2 + 2]; + + msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args); + msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia + + DBUG_PRINT(msg_type, ("message: %s", msgbuf)); + + if (!thd->vio_ok()) + { + sql_print_error(msgbuf); + return; + } + + if (param->testflag & + (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) + { + my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + return; + } + length= (uint) (strxmov(name, param->db_name, ".", param->table_name, + NullS) - name); + protocol->prepare_for_resend(); + protocol->store(name, length, system_charset_info); + protocol->store(param->op_name, system_charset_info); + protocol->store(msg_type, system_charset_info); + protocol->store(msgbuf, msg_length, system_charset_info); + if (protocol->write()) + sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", + msgbuf); + return; +} + +extern "C" { + +volatile int *_ma_killed_ptr(HA_CHECK *param) +{ + /* In theory Unsafe conversion, but should be ok for now */ + return (int*) &(((THD *) (param->thd))->killed); +} + + +void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) +{ + param->error_printed |= 1; + param->out_flag |= O_DATA_LOST; + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "error", fmt, args); + va_end(args); +} + + +void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "info", fmt, args); + va_end(args); +} + + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) +{ + param->warning_printed= 1; + param->out_flag |= O_DATA_LOST; + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "warning", fmt, args); + va_end(args); +} + +} + + +ha_maria::ha_maria(TABLE_SHARE *table_arg): +handler(&maria_hton, table_arg), file(0), +int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | + HA_DUPP_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | + HA_FILE_BASED | HA_CAN_GEOMETRY | HA_READ_RND_SAME | + HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD), +can_enable_indexes(1) +{} + + +static const char *ha_maria_exts[]= +{ + MARIA_NAME_IEXT, + MARIA_NAME_DEXT, + NullS +}; + + +const char **ha_maria::bas_ext() const +{ + return ha_maria_exts; +} + + +const char *ha_maria::index_type(uint key_number) +{ + return ((table->key_info[key_number].flags & HA_FULLTEXT) ? + "FULLTEXT" : + (table->key_info[key_number].flags & HA_SPATIAL) ? + "SPATIAL" : + (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? + "RTREE" : "BTREE"); +} + + +#ifdef HAVE_REPLICATION +int ha_maria::net_read_dump(NET * net) +{ + int data_fd= file->dfile; + int error= 0; + + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (;;) + { + ulong packet_len= my_net_read(net); + if (!packet_len) + break; // end of file + if (packet_len == packet_error) + { + sql_print_error("ha_maria::net_read_dump - read error "); + error= -1; + goto err; + } + if (my_write(data_fd, (byte *) net->read_pos, (uint) packet_len, + MYF(MY_WME | MY_FNABP))) + { + error= errno; + goto err; + } + } +err: + return error; +} + + +int ha_maria::dump(THD * thd, int fd) +{ + MARIA_SHARE *share= file->s; + NET *net= &thd->net; + uint blocksize= share->blocksize; + my_off_t bytes_to_read= share->state.state.data_file_length; + int data_fd= file->dfile; + byte *buf= (byte *) my_malloc(blocksize, MYF(MY_WME)); + if (!buf) + return ENOMEM; + + int error= 0; + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (; bytes_to_read > 0;) + { + uint bytes= my_read(data_fd, buf, blocksize, MYF(MY_WME)); + if (bytes == MY_FILE_ERROR) + { + error= errno; + goto err; + } + + if (fd >= 0) + { + if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP))) + { + error= errno ? errno : EPIPE; + goto err; + } + } + else + { + if (my_net_write(net, (char*) buf, bytes)) + { + error= errno ? errno : EPIPE; + goto err; + } + } + bytes_to_read -= bytes; + } + + if (fd < 0) + { + if (my_net_write(net, "", 0)) + error= errno ? errno : EPIPE; + net_flush(net); + } + +err: + my_free((gptr) buf, MYF(0)); + return error; +} +#endif /* HAVE_REPLICATION */ + + +bool ha_maria::check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_logger_thread) +{ + /* + To be able to open and lock for reading system tables like 'mysql.proc', + when we already have some tables opened and locked, and avoid deadlocks + we have to disallow write-locking of these tables with any other tables. + */ + if (table->s->system_table && + table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && count != 1) + { + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table->s->db.str, + table->s->table_name.str); + return FALSE; + } + return TRUE; +} + + + /* Name is here without an extension */ + +int ha_maria::open(const char *name, int mode, uint test_if_locked) +{ + uint i; + if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER))) + return (my_errno ? my_errno : -1); + + if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) + VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0)); + +#ifdef NOT_USED + if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap) + VOID(maria_extra(file, HA_EXTRA_MMAP, 0)); +#endif + + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) + VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0)); + if (!table->s->db_record_offset) + int_table_flags |= HA_REC_NOT_IN_SEQ; + if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + int_table_flags |= HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *) parser->plugin->info; + } + return (0); +} + + +int ha_maria::close(void) +{ + MARIA_HA *tmp= file; + file= 0; + return maria_close(tmp); +} + + +int ha_maria::write_row(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status); + + /* If we have a timestamp column, update it to the current time */ + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + + /* + If we have an auto_increment column and we are writing a changed row + or a new row, then update the auto_increment value in the record. + */ + if (table->next_number_field && buf == table->record[0]) + update_auto_increment(); + return maria_write(file, buf); +} + + +int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) +{ + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + int error; + HA_CHECK param; + MARIA_SHARE *share= file->s; + const char *old_proc_info= thd->proc_info; + + thd->proc_info= "Checking table"; + mariachk_init(¶m); + param.thd= thd; + param.op_name= "check"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= check_opt->flags | T_CHECK | T_SILENT; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + + if (!(table->db_stat & HA_READ_ONLY)) + param.testflag |= T_STATISTICS; + param.using_global_keycache= 1; + + if (!maria_is_crashed(file) && + (((param.testflag & T_CHECK_ONLY_CHANGED) && + !(share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR)) && + share->state.open_count == 0) || + ((param.testflag & T_FAST) && (share->state.open_count == + (uint) (share->global_changed ? 1 : + 0))))) + return HA_ADMIN_ALREADY_DONE; + + error= maria_chk_status(¶m, file); // Not fatal + error= maria_chk_size(¶m, file); + if (!error) + error |= maria_chk_del(¶m, file, param.testflag); + if (!error) + error= maria_chk_key(¶m, file); + if (!error) + { + if ((!(param.testflag & T_QUICK) && + ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) || + (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file)) + { + uint old_testflag= param.testflag; + param.testflag |= T_MEDIUM; + if (!(error= init_io_cache(¶m.read_cache, file->dfile, + my_default_record_cache_size, READ_CACHE, + share->pack.header_length, 1, MYF(MY_WME)))) + { + error= maria_chk_data_link(¶m, file, param.testflag & T_EXTEND); + end_io_cache(&(param.read_cache)); + } + param.testflag= old_testflag; + } + } + if (!error) + { + if ((share->state.changed & (STATE_CHANGED | + STATE_CRASHED_ON_REPAIR | + STATE_CRASHED | STATE_NOT_ANALYZED)) || + (param.testflag & T_STATISTICS) || maria_is_crashed(file)) + { + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + pthread_mutex_lock(&share->intern_lock); + share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + if (!(table->db_stat & HA_READ_ONLY)) + error= maria_update_state_info(¶m, file, UPDATE_TIME | UPDATE_OPEN_COUNT | + UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + } + } + else if (!maria_is_crashed(file) && !thd->killed) + { + maria_mark_crashed(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + + thd->proc_info= old_proc_info; + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +/* + Analyze the key distribution in the table + As the table may be only locked for read, we have to take into account that + two threads may do an analyze at the same time! +*/ + +int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) +{ + int error= 0; + HA_CHECK param; + MARIA_SHARE *share= file->s; + + mariachk_init(¶m); + param.thd= thd; + param.op_name= "analyze"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | + T_DONT_CHECK_CHECKSUM); + param.using_global_keycache= 1; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + + if (!(share->state.changed & STATE_NOT_ANALYZED)) + return HA_ADMIN_ALREADY_DONE; + + error= maria_chk_key(¶m, file); + if (!error) + { + pthread_mutex_lock(&share->intern_lock); + error= maria_update_state_info(¶m, file, UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + } + else if (!maria_is_crashed(file) && !thd->killed) + maria_mark_crashed(file); + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt) +{ + HA_CHECK_OPT tmp_check_opt; + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + const char *table_name= table->s->table_name.str; + int error; + const char *errmsg; + DBUG_ENTER("restore"); + + if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, + MARIA_NAME_DEXT)) + DBUG_RETURN(HA_ADMIN_INVALID); + + strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed in my_copy (Error %d)"; + goto err; + } + + tmp_check_opt.init(); + tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK; + DBUG_RETURN(repair(thd, &tmp_check_opt)); + +err: + { + HA_CHECK param; + mariachk_init(¶m); + param.thd= thd; + param.op_name= "restore"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt) +{ + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + const char *table_name= table->s->table_name.str; + int error; + const char *errmsg; + DBUG_ENTER("ha_maria::backup"); + + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + reg_ext)) + { + errmsg= "Failed in fn_format() for .frm file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed copying .frm file (errno: %d)"; + goto err; + } + + /* Change extension */ + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + MARIA_NAME_DEXT)) + { + errmsg= "Failed in fn_format() for .MYD file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + errmsg= "Failed copying .MYD file (errno: %d)"; + error= HA_ADMIN_FAILED; + goto err; + } + DBUG_RETURN(HA_ADMIN_OK); + +err: + { + HA_CHECK param; + mariachk_init(¶m); + param.thd= thd; + param.op_name= "backup"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + HA_CHECK param; + ha_rows start_records; + + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + + mariachk_init(¶m); + param.thd= thd; + param.op_name= "repair"; + param.testflag= ((check_opt->flags & ~(T_EXTEND)) | + T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM | + (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT)); + param.sort_buffer_length= check_opt->sort_buffer_size; + start_records= file->state->records; + while ((error= repair(thd, param, 0)) && param.retry_repair) + { + param.retry_repair= 0; + if (test_all_bits(param.testflag, + (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK))) + { + param.testflag &= ~T_RETRY_WITHOUT_QUICK; + sql_print_information("Retrying repair of: '%s' without quick", + table->s->path); + continue; + } + param.testflag &= ~T_QUICK; + if ((param.testflag & T_REP_BY_SORT)) + { + param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP; + sql_print_information("Retrying repair of: '%s' with keycache", + table->s->path); + continue; + } + break; + } + if (!error && start_records != file->state->records && + !(check_opt->flags & T_VERY_SILENT)) + { + char llbuff[22], llbuff2[22]; + sql_print_information("Found %s of %s rows when repairing '%s'", + llstr(file->state->records, llbuff), + llstr(start_records, llbuff2), table->s->path); + } + return error; +} + +int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + HA_CHECK param; + + mariachk_init(¶m); + param.thd= thd; + param.op_name= "optimize"; + param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | + T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX); + param.sort_buffer_length= check_opt->sort_buffer_size; + if ((error= repair(thd, param, 1)) && param.retry_repair) + { + sql_print_warning("Warning: Optimize table got errno %d, retrying", + my_errno); + param.testflag &= ~T_REP_BY_SORT; + error= repair(thd, param, 1); + } + return error; +} + + +int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool optimize) +{ + int error= 0; + uint local_testflag= param.testflag; + bool optimize_done= !optimize, statistics_done= 0; + const char *old_proc_info= thd->proc_info; + char fixed_name[FN_REFLEN]; + MARIA_SHARE *share= file->s; + ha_rows rows= file->state->records; + DBUG_ENTER("ha_maria::repair"); + + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.tmpfile_createflag= O_RDWR | O_TRUNC; + param.using_global_keycache= 1; + param.thd= thd; + param.tmpdir= &mysql_tmpdir_list; + param.out_flag= 0; + strmov(fixed_name, file->filename); + + // Don't lock tables if we have used LOCK TABLE + if (!thd->locked_tables && + maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) + { + _ma_check_print_error(¶m, ER(ER_CANT_LOCK), my_errno); + DBUG_RETURN(HA_ADMIN_FAILED); + } + + if (!optimize || + ((file->state->del || share->state.split != file->state->records) && + (!(param.testflag & T_QUICK) || + !(share->state.changed & STATE_NOT_OPTIMIZED_KEYS)))) + { + ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ? + maria_get_mask_all_keys_active(share->base.keys) : + share->state.key_map); + uint testflag= param.testflag; + if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) && + (local_testflag & T_REP_BY_SORT)) + { + local_testflag |= T_STATISTICS; + param.testflag |= T_STATISTICS; // We get this for free + statistics_done= 1; + if (thd->variables.maria_repair_threads > 1) + { + char buf[40]; + /* TODO: respect maria_repair_threads variable */ + my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map)); + thd->proc_info= buf; + error= maria_repair_parallel(¶m, file, fixed_name, + param.testflag & T_QUICK); + thd->proc_info= "Repair done"; // to reset proc_info, as + // it was pointing to local buffer + } + else + { + thd->proc_info= "Repair by sorting"; + error= maria_repair_by_sort(¶m, file, fixed_name, + param.testflag & T_QUICK); + } + } + else + { + thd->proc_info= "Repair with keycache"; + param.testflag &= ~T_REP_BY_SORT; + error= maria_repair(¶m, file, fixed_name, param.testflag & T_QUICK); + } + param.testflag= testflag; + optimize_done= 1; + } + if (!error) + { + if ((local_testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + { + optimize_done= 1; + thd->proc_info= "Sorting index"; + error= maria_sort_index(¶m, file, fixed_name); + } + if (!statistics_done && (local_testflag & T_STATISTICS)) + { + if (share->state.changed & STATE_NOT_ANALYZED) + { + optimize_done= 1; + thd->proc_info= "Analyzing"; + error= maria_chk_key(¶m, file); + } + else + local_testflag &= ~T_STATISTICS; // Don't update statistics + } + } + thd->proc_info= "Saving state"; + if (!error) + { + if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file)) + { + share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + /* + the following 'if', thought conceptually wrong, + is a useful optimization nevertheless. + */ + if (file->state != &file->s->state.state) + file->s->state.state= *file->state; + if (file->s->base.auto_key) + _ma_update_auto_increment_key(¶m, file, 1); + if (optimize_done) + error= maria_update_state_info(¶m, file, + UPDATE_TIME | UPDATE_OPEN_COUNT | + (local_testflag & + T_STATISTICS ? UPDATE_STAT : 0)); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + if (rows != file->state->records && !(param.testflag & T_VERY_SILENT)) + { + char llbuff[22], llbuff2[22]; + _ma_check_print_warning(¶m, "Number of rows changed from %s to %s", + llstr(rows, llbuff), + llstr(file->state->records, llbuff2)); + } + } + else + { + maria_mark_crashed_on_repair(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + maria_update_state_info(¶m, file, 0); + } + thd->proc_info= old_proc_info; + if (!thd->locked_tables) + maria_lock_database(file, F_UNLCK); + DBUG_RETURN(error ? HA_ADMIN_FAILED : + !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK); +} + + +/* + Assign table indexes to a specific key cache. +*/ + +int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt) +{ + KEY_CACHE *new_key_cache= check_opt->key_cache; + const char *errmsg= 0; + int error= HA_ADMIN_OK; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + DBUG_ENTER("ha_maria::assign_to_keycache"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + /* We only come here when the user did specify an index map */ + key_map kmap; + if (get_key_map_from_key_list(&kmap, table, table_list->use_index)) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + map= kmap.to_ulonglong(); + } + + if ((error= maria_assign_to_key_cache(file, map, new_key_cache))) + { + char buf[STRING_BUFFER_USUAL_SIZE]; + my_snprintf(buf, sizeof(buf), + "Failed to flush to index file (errno: %d)", error); + errmsg= buf; + error= HA_ADMIN_CORRUPT; + } + +err: + if (error != HA_ADMIN_OK) + { + /* Send error to user */ + HA_CHECK param; + mariachk_init(¶m); + param.thd= thd; + param.op_name= "assign_to_keycache"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg); + } + DBUG_RETURN(error); +} + + +/* + Preload pages of the index file for a table into the key cache. +*/ + +int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + const char *errmsg; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + my_bool ignore_leaves= table_list->ignore_leaves; + + DBUG_ENTER("ha_maria::preload_keys"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + key_map kmap; + get_key_map_from_key_list(&kmap, table, table_list->use_index); + if (kmap.is_set_all()) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + if (!kmap.is_clear_all()) + map= kmap.to_ulonglong(); + } + + maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE, + (void*) &thd->variables.preload_buff_size); + + if ((error= maria_preload(file, map, ignore_leaves))) + { + switch (error) { + case HA_ERR_NON_UNIQUE_BLOCK_SIZE: + errmsg= "Indexes use different block sizes"; + break; + case HA_ERR_OUT_OF_MEM: + errmsg= "Failed to allocate buffer"; + break; + default: + char buf[ERRMSGSIZE + 20]; + my_snprintf(buf, ERRMSGSIZE, + "Failed to read from index file (errno: %d)", my_errno); + errmsg= buf; + } + error= HA_ADMIN_FAILED; + goto err; + } + + DBUG_RETURN(HA_ADMIN_OK); + +err: + { + HA_CHECK param; + mariachk_init(¶m); + param.thd= thd; + param.op_name= "preload_keys"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg); + DBUG_RETURN(error); + } +} + + +/* + Disable indexes, making it persistent if requested. + + SYNOPSIS + disable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ disable all non-unique keys + HA_KEY_SWITCH_ALL disable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_maria::disable_indexes(uint mode) +{ + int error; + + if (mode == HA_KEY_SWITCH_ALL) + { + /* call a storage engine function to switch the key map */ + error= maria_disable_indexes(file); + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + maria_extra(file, HA_EXTRA_NO_KEYS, 0); + info(HA_STATUS_CONST); // Read new key info + error= 0; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Enable indexes, making it persistent if requested. + + SYNOPSIS + enable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ enable all non-unique keys + HA_KEY_SWITCH_ALL enable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent + + DESCRIPTION + Enable indexes, which might have been disabled by disable_index() before. + The modes without _SAVE work only if both data and indexes are empty, + since the MARIA repair would enable them persistently. + To be sure in these cases, call handler::delete_all_rows() before. + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + !=0 Error, among others: + HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry. + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_maria::enable_indexes(uint mode) +{ + int error; + + if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys)) + { + /* All indexes are enabled already. */ + return 0; + } + + if (mode == HA_KEY_SWITCH_ALL) + { + error= maria_enable_indexes(file); + /* + Do not try to repair on error, + as this could make the enabled state persistent, + but mode==HA_KEY_SWITCH_ALL forbids it. + */ + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + THD *thd= current_thd; + HA_CHECK param; + const char *save_proc_info= thd->proc_info; + thd->proc_info= "Creating index"; + mariachk_init(¶m); + param.op_name= "recreating_index"; + param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK | + T_CREATE_MISSING_KEYS); + param.myf_rw &= ~MY_WAIT_IF_FULL; + param.sort_buffer_length= thd->variables.maria_sort_buff_size; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + param.tmpdir= &mysql_tmpdir_list; + if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param.retry_repair) + { + sql_print_warning("Warning: Enabling keys got errno %d, retrying", + my_errno); + /* Repairing by sort failed. Now try standard repair method. */ + param.testflag &= ~(T_REP_BY_SORT | T_QUICK); + error= (repair(thd, param, 0) != HA_ADMIN_OK); + /* + If the standard repair succeeded, clear all error messages which + might have been set by the first repair. They can still be seen + with SHOW WARNINGS then. + */ + if (!error) + thd->clear_error(); + } + info(HA_STATUS_CONST); + thd->proc_info= save_proc_info; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Test if indexes are disabled. + + + SYNOPSIS + indexes_are_disabled() + no parameters + + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] +*/ + +int ha_maria::indexes_are_disabled(void) +{ + return maria_indexes_are_disabled(file); +} + + +/* + prepare for a many-rows insert operation + e.g. - disable indexes (if they can be recreated fast) or + activate special bulk-insert optimizations + + SYNOPSIS + start_bulk_insert(rows) + rows Rows to be inserted + 0 if we don't know + + NOTICE + Do not forget to call end_bulk_insert() later! +*/ + +void ha_maria::start_bulk_insert(ha_rows rows) +{ + DBUG_ENTER("ha_maria::start_bulk_insert"); + THD *thd= current_thd; + ulong size= min(thd->variables.read_buff_size, + table->s->avg_row_length * rows); + DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu", + (ulong) rows, size)); + + /* don't enable row cache if too few rows */ + if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE)) + maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size); + + can_enable_indexes= maria_is_all_keys_active(file->s->state.key_map, + file->s->base.keys); + + if (!(specialflag & SPECIAL_SAFE_MODE)) + { + /* + Only disable old index if the table was empty and we are inserting + a lot of rows. + We should not do this for only a few rows as this is slower and + we don't want to update the key statistics based of only a few rows. + */ + if (file->state->records == 0 && can_enable_indexes && + (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES)) + maria_disable_non_unique_index(file, rows); + else + if (!file->bulk_insert && + (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT)) + { + maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows); + } + } + DBUG_VOID_RETURN; +} + + +/* + end special bulk-insert optimizations, + which have been activated by start_bulk_insert(). + + SYNOPSIS + end_bulk_insert() + no arguments + + RETURN + 0 OK + != 0 Error +*/ + +int ha_maria::end_bulk_insert() +{ + maria_end_bulk_insert(file); + int err= maria_extra(file, HA_EXTRA_NO_CACHE, 0); + return err ? err : can_enable_indexes ? + enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE) : 0; +} + + +bool ha_maria::check_and_repair(THD *thd) +{ + int error= 0; + int marked_crashed; + char *old_query; + uint old_query_length; + HA_CHECK_OPT check_opt; + DBUG_ENTER("ha_maria::check_and_repair"); + + check_opt.init(); + check_opt.flags= T_MEDIUM | T_AUTO_REPAIR; + // Don't use quick if deleted rows + if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK)) + check_opt.flags |= T_QUICK; + sql_print_warning("Checking table: '%s'", table->s->path); + + old_query= thd->query; + old_query_length= thd->query_length; + pthread_mutex_lock(&LOCK_thread_count); + thd->query= table->s->table_name.str; + thd->query_length= table->s->table_name.length; + pthread_mutex_unlock(&LOCK_thread_count); + + if ((marked_crashed= maria_is_crashed(file)) || check(thd, &check_opt)) + { + sql_print_warning("Recovering table: '%s'", table->s->path); + check_opt.flags= + ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) | + (marked_crashed ? 0 : T_QUICK) | + (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) | + T_AUTO_REPAIR); + if (repair(thd, &check_opt)) + error= 1; + } + pthread_mutex_lock(&LOCK_thread_count); + thd->query= old_query; + thd->query_length= old_query_length; + pthread_mutex_unlock(&LOCK_thread_count); + DBUG_RETURN(error); +} + + +bool ha_maria::is_crashed() const +{ + return (file->s->state.changed & STATE_CRASHED || + (my_disable_locking && file->s->state.open_count)); +} + + +int ha_maria::update_row(const byte * old_data, byte * new_data) +{ + statistic_increment(table->in_use->status_var.ha_update_count, &LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + return maria_update(file, old_data, new_data); +} + + +int ha_maria::delete_row(const byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_delete_count, &LOCK_status); + return maria_delete(file, buf); +} + + +int ha_maria::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, active_index, key, key_len, find_flag); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, index, key, key_len, find_flag); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_read_last(byte * buf, const byte * key, uint key_len) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_next(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error= maria_rnext(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_prev(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_prev_count, + &LOCK_status); + int error= maria_rprev(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_first(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); + int error= maria_rfirst(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_last(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_last_count, + &LOCK_status); + int error= maria_rlast(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_next_same(byte * buf, + const byte *key __attribute__ ((unused)), + uint length __attribute__ ((unused))) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error= maria_rnext_same(file, buf); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::rnd_init(bool scan) +{ + if (scan) + return maria_scan_init(file); + return maria_extra(file, HA_EXTRA_RESET, 0); +} + + +int ha_maria::rnd_next(byte *buf) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, + &LOCK_status); + int error= maria_scan(file, buf); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::restart_rnd_next(byte *buf, byte *pos) +{ + return rnd_pos(buf, pos); +} + + +int ha_maria::rnd_pos(byte * buf, byte *pos) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length)); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +void ha_maria::position(const byte * record) +{ + my_off_t position= maria_position(file); + my_store_ptr(ref, ref_length, position); +} + + +void ha_maria::info(uint flag) +{ + MARIA_INFO info; + char name_buff[FN_REFLEN]; + + (void) maria_status(file, &info, flag); + if (flag & HA_STATUS_VARIABLE) + { + records= info.records; + deleted= info.deleted; + data_file_length= info.data_file_length; + index_file_length= info.index_file_length; + delete_length= info.delete_length; + check_time= info.check_time; + mean_rec_length= info.mean_reclength; + } + if (flag & HA_STATUS_CONST) + { + TABLE_SHARE *share= table->s; + max_data_file_length= info.max_data_file_length; + max_index_file_length= info.max_index_file_length; + create_time= info.create_time; + sortkey= info.sortkey; + ref_length= info.reflength; + share->db_options_in_use= info.options; + block_size= maria_block_size; + + /* Update share */ + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_lock(&share->mutex); + share->keys_in_use.set_prefix(share->keys); + share->keys_in_use.intersect_extended(info.key_map); + share->keys_for_keyread.intersect(share->keys_in_use); + share->db_record_offset= info.record_offset; + if (share->key_parts) + memcpy((char*) table->key_info[0].rec_per_key, + (char*) info.rec_per_key, + sizeof(table->key_info[0].rec_per_key) * share->key_parts); + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_unlock(&share->mutex); + + /* + Set data_file_name and index_file_name to point at the symlink value + if table is symlinked (Ie; Real name is not same as generated name) + */ + data_file_name= index_file_name= 0; + fn_format(name_buff, file->filename, "", MARIA_NAME_DEXT, MY_APPEND_EXT); + if (strcmp(name_buff, info.data_file_name)) + data_file_name= info.data_file_name; + fn_format(name_buff, file->filename, "", MARIA_NAME_IEXT, MY_APPEND_EXT); + if (strcmp(name_buff, info.index_file_name)) + index_file_name= info.index_file_name; + } + if (flag & HA_STATUS_ERRKEY) + { + errkey= info.errkey; + my_store_ptr(dupp_ref, ref_length, info.dupp_key_pos); + } + /* Faster to always update, than to do it based on flag */ + update_time= info.update_time; + auto_increment_value= info.auto_increment; +} + + +int ha_maria::extra(enum ha_extra_function operation) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD) + return 0; + return maria_extra(file, operation, 0); +} + + +/* To be used with WRITE_CACHE and EXTRA_CACHE */ + +int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE) + return 0; + return maria_extra(file, operation, (void*) &cache_size); +} + + +int ha_maria::delete_all_rows() +{ + return maria_delete_all_rows(file); +} + + +int ha_maria::delete_table(const char *name) +{ + return maria_delete_table(name); +} + + +int ha_maria::external_lock(THD *thd, int lock_type) +{ + return maria_lock_database(file, !table->s->tmp_table ? + lock_type : ((lock_type == F_UNLCK) ? + F_UNLCK : F_EXTRA_LCK)); +} + + +THR_LOCK_DATA **ha_maria::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK) + file->lock.type= lock_type; + *to++= &file->lock; + return to; +} + + +void ha_maria::update_create_info(HA_CREATE_INFO *create_info) +{ + ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value= auto_increment_value; + } + create_info->data_file_name= data_file_name; + create_info->index_file_name= index_file_name; +} + + +int ha_maria::create(const char *name, register TABLE *table_arg, + HA_CREATE_INFO *info) +{ + int error; + uint i, j, recpos, minpos, fieldpos, temp_length, length, create_flags= 0; + bool found_real_auto_increment= 0; + enum ha_base_keytype type; + char buff[FN_REFLEN]; + KEY *pos; + MARIA_KEYDEF *keydef; + MARIA_COLUMNDEF *recinfo, *recinfo_pos; + HA_KEYSEG *keyseg; + TABLE_SHARE *share= table_arg->s; + uint options= share->db_options_in_use; + DBUG_ENTER("ha_maria::create"); + + type= HA_KEYTYPE_BINARY; // Keep compiler happy + if (!(my_multi_malloc(MYF(MY_WME), + &recinfo, (share->fields * 2 + 2) * + sizeof(MARIA_COLUMNDEF), + &keydef, share->keys * sizeof(MARIA_KEYDEF), + &keyseg, + ((share->key_parts + share->keys) * + sizeof(HA_KEYSEG)), NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + pos= table_arg->key_info; + for (i= 0; i < share->keys; i++, pos++) + { + if (pos->flags & HA_USES_PARSER) + create_flags |= HA_CREATE_RELIES_ON_SQL_LAYER; + keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL)); + keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? + (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : + pos->algorithm; + keydef[i].seg= keyseg; + keydef[i].keysegs= pos->key_parts; + for (j= 0; j < pos->key_parts; j++) + { + Field *field= pos->key_part[j].field; + type= field->key_type(); + keydef[i].seg[j].flag= pos->key_part[j].key_part_flag; + + if (options & HA_OPTION_PACK_KEYS || + (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY | + HA_SPACE_PACK_USED))) + { + if (pos->key_part[j].length > 8 && + (type == HA_KEYTYPE_TEXT || + type == HA_KEYTYPE_NUM || + (type == HA_KEYTYPE_BINARY && !field->zero_pack()))) + { + /* No blobs here */ + if (j == 0) + keydef[i].flag |= HA_PACK_KEY; + if (!(field->flags & ZEROFILL_FLAG) && + (field->type() == MYSQL_TYPE_STRING || + field->type() == MYSQL_TYPE_VAR_STRING || + ((int) (pos->key_part[j].length - field->decimals())) >= 4)) + keydef[i].seg[j].flag |= HA_SPACE_PACK; + } + else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16)) + keydef[i].flag |= HA_BINARY_PACK_KEY; + } + keydef[i].seg[j].type= (int) type; + keydef[i].seg[j].start= pos->key_part[j].offset; + keydef[i].seg[j].length= pos->key_part[j].length; + keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end= + keydef[i].seg[j].bit_length= 0; + keydef[i].seg[j].bit_pos= 0; + keydef[i].seg[j].language= field->charset()->number; + + if (field->null_ptr) + { + keydef[i].seg[j].null_bit= field->null_bit; + keydef[i].seg[j].null_pos= (uint) (field->null_ptr - + (uchar *) table_arg->record[0]); + } + else + { + keydef[i].seg[j].null_bit= 0; + keydef[i].seg[j].null_pos= 0; + } + if (field->type() == FIELD_TYPE_BLOB || + field->type() == FIELD_TYPE_GEOMETRY) + { + keydef[i].seg[j].flag |= HA_BLOB_PART; + /* save number of bytes used to pack length */ + keydef[i].seg[j].bit_start= (uint) (field->pack_length() - + share->blob_ptr_size); + } + else if (field->type() == FIELD_TYPE_BIT) + { + keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len; + keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs; + keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr - + (uchar *) table_arg->record[0]); + } + } + keyseg += pos->key_parts; + } + + if (table_arg->found_next_number_field) + { + keydef[share->next_number_index].flag |= HA_AUTO_KEY; + found_real_auto_increment= share->next_number_key_offset == 0; + } + + recpos= 0; + recinfo_pos= recinfo; + while (recpos < (uint) share->reclength) + { + Field **field, *found= 0; + minpos= share->reclength; + length= 0; + + for (field= table_arg->field; *field; field++) + { + if ((fieldpos= (*field)->offset()) >= recpos && fieldpos <= minpos) + { + /* skip null fields */ + if (!(temp_length= (*field)->pack_length_in_rec())) + continue; /* Skip null-fields */ + if (!found || fieldpos < minpos || + (fieldpos == minpos && temp_length < length)) + { + minpos= fieldpos; + found= *field; + length= temp_length; + } + } + } + DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d", + found, recpos, minpos, length)); + if (recpos != minpos) + { // Reserved space (Null bits?) + bzero((char*) recinfo_pos, sizeof(*recinfo_pos)); + recinfo_pos->type= (int) FIELD_NORMAL; + recinfo_pos++->length= (uint16) (minpos - recpos); + } + if (!found) + break; + + if (found->flags & BLOB_FLAG) + recinfo_pos->type= (int) FIELD_BLOB; + else if (found->type() == MYSQL_TYPE_VARCHAR) + recinfo_pos->type= FIELD_VARCHAR; + else if (!(options & HA_OPTION_PACK_RECORD)) + recinfo_pos->type= (int) FIELD_NORMAL; + else if (found->zero_pack()) + recinfo_pos->type= (int) FIELD_SKIP_ZERO; + else + recinfo_pos->type= (int) ((length <= 3 || + (found->flags & ZEROFILL_FLAG)) ? + FIELD_NORMAL : + found->type() == MYSQL_TYPE_STRING || + found->type() == MYSQL_TYPE_VAR_STRING ? + FIELD_SKIP_ENDSPACE : FIELD_SKIP_PRESPACE); + if (found->null_ptr) + { + recinfo_pos->null_bit= found->null_bit; + recinfo_pos->null_pos= (uint) (found->null_ptr - + (uchar *) table_arg->record[0]); + } + else + { + recinfo_pos->null_bit= 0; + recinfo_pos->null_pos= 0; + } + (recinfo_pos++)->length= (uint16) length; + recpos= minpos + length; + DBUG_PRINT("loop", ("length: %d type: %d", + recinfo_pos[-1].length, recinfo_pos[-1].type)); + + } + MARIA_CREATE_INFO create_info; + bzero((char*) &create_info, sizeof(create_info)); + create_info.max_rows= share->max_rows; + create_info.reloc_rows= share->min_rows; + create_info.with_auto_increment= found_real_auto_increment; + create_info.auto_increment= (info->auto_increment_value ? + info->auto_increment_value - 1 : (ulonglong) 0); + create_info.data_file_length= ((ulonglong) share->max_rows * + share->avg_row_length); + create_info.data_file_name= info->data_file_name; + create_info.index_file_name= info->index_file_name; + + if (info->options & HA_LEX_CREATE_TMP_TABLE) + create_flags |= HA_CREATE_TMP_TABLE; + if (options & HA_OPTION_PACK_RECORD) + create_flags |= HA_PACK_RECORD; + if (options & HA_OPTION_CHECKSUM) + create_flags |= HA_CREATE_CHECKSUM; + if (options & HA_OPTION_DELAY_KEY_WRITE) + create_flags |= HA_CREATE_DELAY_KEY_WRITE; + + /* TODO: Check that the following fn_format is really needed */ + error= + maria_create(fn_format + (buff, name, "", "", MY_UNPACK_FILENAME | MY_APPEND_EXT), + share->keys, keydef, (uint) (recinfo_pos - recinfo), recinfo, + 0, (MARIA_UNIQUEDEF *) 0, &create_info, create_flags); + + my_free((gptr) recinfo, MYF(0)); + DBUG_RETURN(error); +} + + +int ha_maria::rename_table(const char *from, const char *to) +{ + return maria_rename(from, to); +} + + +ulonglong ha_maria::get_auto_increment() +{ + ulonglong nr; + int error; + byte key[HA_MAX_KEY_LENGTH]; + + if (!table->s->next_number_key_offset) + { // Autoincrement at key-start + ha_maria::info(HA_STATUS_AUTO); + return auto_increment_value; + } + + /* it's safe to call the following if bulk_insert isn't on */ + maria_flush_bulk_insert(file, table->s->next_number_index); + + (void) extra(HA_EXTRA_KEYREAD); + key_copy(key, table->record[0], + table->key_info + table->s->next_number_index, + table->s->next_number_key_offset); + error= maria_rkey(file, table->record[1], (int) table->s->next_number_index, + key, table->s->next_number_key_offset, HA_READ_PREFIX_LAST); + if (error) + nr= 1; + else + { + /* Get data from record[1] */ + nr= ((ulonglong) table->next_number_field-> + val_int_offset(table->s->rec_buff_length) + 1); + } + extra(HA_EXTRA_NO_KEYREAD); + return nr; +} + + +/* + Find out how many rows there is in the given range + + SYNOPSIS + records_in_range() + inx Index to use + min_key Start of range. Null pointer if from first key + max_key End of range. Null pointer if to last key + + NOTES + min_key.flag can have one of the following values: + HA_READ_KEY_EXACT Include the key in the range + HA_READ_AFTER_KEY Don't include key in range + + max_key.flag can have one of the following values: + HA_READ_BEFORE_KEY Don't include key in range + HA_READ_AFTER_KEY Include all 'end_key' values in the range + + RETURN + HA_POS_ERROR Something is wrong with the index tree. + 0 There is no matching keys in the given range + number > 0 There is approximately 'number' matching rows in + the range. +*/ + +ha_rows ha_maria::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key); +} + + +int ha_maria::ft_read(byte * buf) +{ + int error; + + if (!ft_handler) + return -1; + + thread_safe_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); // why ? + + error= ft_handler->please->read_next(ft_handler, (char*) buf); + + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +uint ha_maria::checksum() const +{ + return (uint) file->state->checksum; +} + + +bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != auto_increment_value || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || table_changes == IS_EQUAL_NO) + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_maria.h b/sql/ha_maria.h new file mode 100644 index 00000000000..5387820edc6 --- /dev/null +++ b/sql/ha_maria.h @@ -0,0 +1,141 @@ +/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* class for the the maria handler */ + +#include + +#define HA_RECOVER_NONE 0 /* No automatic recover */ +#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */ +#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */ +#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */ +#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */ + +extern ulong maria_sort_buffer_size; +extern TYPELIB maria_recover_typelib; +extern ulong maria_recover_options; + +class ha_maria :public handler +{ + MARIA_HA *file; + ulong int_table_flags; + char *data_file_name, *index_file_name; + bool can_enable_indexes; + int repair(THD * thd, HA_CHECK ¶m, bool optimize); + +public: + ha_maria(TABLE_SHARE * table_arg); + ~ha_maria() + {} + const char *table_type() const + { return "MARIA"; } + const char *index_type(uint key_number); + const char **bas_ext() const; + ulong table_flags() const + { return int_table_flags; } + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | + HA_READ_ORDER | HA_KEYREAD_ONLY); + } + uint max_supported_keys() const + { return MARIA_MAX_KEY; } + uint max_supported_key_length() const + { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const + { return HA_MAX_KEY_LENGTH; } + uint checksum() const; + + virtual bool check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE * table, + uint count, + bool called_by_logger_thread); + int open(const char *name, int mode, uint test_if_locked); + int close(void); + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + int index_next_same(byte * buf, const byte * key, uint keylen); + int ft_init() + { + if (!ft_handler) + return 1; + ft_handler->please->reinit_search(ft_handler); + return 0; + } + FT_INFO *ft_init_ext(uint flags, uint inx, String * key) + { + return maria_ft_init_search(flags, file, inx, + (byte *) key->ptr(), key->length(), + key->charset(), table->record[0]); + } + int ft_read(byte * buf); + int rnd_init(bool scan); + int rnd_next(byte * buf); + int rnd_pos(byte * buf, byte * pos); + int restart_rnd_next(byte * buf, byte * pos); + void position(const byte * record); + void info(uint); + int extra(enum ha_extra_function operation); + int extra_opt(enum ha_extra_function operation, ulong cache_size); + int external_lock(THD * thd, int lock_type); + int delete_all_rows(void); + int disable_indexes(uint mode); + int enable_indexes(uint mode); + int indexes_are_disabled(void); + void start_bulk_insert(ha_rows rows); + int end_bulk_insert(); + ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key); + void update_create_info(HA_CREATE_INFO * create_info); + int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info); + THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type); + ulonglong get_auto_increment(); + int rename_table(const char *from, const char *to); + int delete_table(const char *name); + int check(THD * thd, HA_CHECK_OPT * check_opt); + int analyze(THD * thd, HA_CHECK_OPT * check_opt); + int repair(THD * thd, HA_CHECK_OPT * check_opt); + bool check_and_repair(THD * thd); + bool is_crashed() const; + bool auto_repair() const + { return maria_recover_options != 0; } + int optimize(THD * thd, HA_CHECK_OPT * check_opt); + int restore(THD * thd, HA_CHECK_OPT * check_opt); + int backup(THD * thd, HA_CHECK_OPT * check_opt); + int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt); + int preload_keys(THD * thd, HA_CHECK_OPT * check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes); +#ifdef HAVE_REPLICATION + int dump(THD * thd, int fd); + int net_read_dump(NET * net); +#endif +}; diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index ec39ee00efc..c347bc64f61 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -105,7 +105,7 @@ static handler *myisam_create_handler(TABLE_SHARE *table) // collect errors printed by mi_check routines -static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, +static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, const char *fmt, va_list args) { THD* thd = (THD*)param->thd; @@ -146,13 +146,13 @@ static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, extern "C" { -volatile int *killed_ptr(MI_CHECK *param) +volatile int *killed_ptr(HA_CHECK *param) { /* In theory Unsafe conversion, but should be ok for now */ return (int*) &(((THD *)(param->thd))->killed); } -void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +void mi_check_print_error(HA_CHECK *param, const char *fmt,...) { param->error_printed|=1; param->out_flag|= O_DATA_LOST; @@ -162,7 +162,7 @@ void mi_check_print_error(MI_CHECK *param, const char *fmt,...) va_end(args); } -void mi_check_print_info(MI_CHECK *param, const char *fmt,...) +void mi_check_print_info(HA_CHECK *param, const char *fmt,...) { va_list args; va_start(args, fmt); @@ -170,7 +170,7 @@ void mi_check_print_info(MI_CHECK *param, const char *fmt,...) va_end(args); } -void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) { param->warning_printed=1; param->out_flag|= O_DATA_LOST; @@ -382,7 +382,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) { if (!file) return HA_ADMIN_INTERNAL_ERROR; int error; - MI_CHECK param; + HA_CHECK param; MYISAM_SHARE* share = file->s; const char *old_proc_info=thd->proc_info; @@ -393,7 +393,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag = check_opt->flags | T_CHECK | T_SILENT; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; if (!(table->db_stat & HA_READ_ONLY)) param.testflag|= T_STATISTICS; @@ -474,7 +474,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) { int error=0; - MI_CHECK param; + HA_CHECK param; MYISAM_SHARE* share = file->s; myisamchk_init(¶m); @@ -485,7 +485,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | T_DONT_CHECK_CHECKSUM); param.using_global_keycache = 1; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; if (!(share->state.changed & STATE_NOT_ANALYZED)) return HA_ADMIN_ALREADY_DONE; @@ -531,7 +531,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "restore"; @@ -591,7 +591,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "backup"; @@ -607,7 +607,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt) { int error; - MI_CHECK param; + HA_CHECK param; ha_rows start_records; if (!file) return HA_ADMIN_INTERNAL_ERROR; @@ -657,7 +657,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) { int error; if (!file) return HA_ADMIN_INTERNAL_ERROR; - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd = thd; @@ -676,7 +676,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) } -int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool optimize) +int ha_myisam::repair(THD *thd, HA_CHECK ¶m, bool optimize) { int error=0; uint local_testflag=param.testflag; @@ -854,7 +854,7 @@ int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt) if (error != HA_ADMIN_OK) { /* Send error to user */ - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "assign_to_keycache"; @@ -922,7 +922,7 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "preload_keys"; @@ -1029,7 +1029,7 @@ int ha_myisam::enable_indexes(uint mode) else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) { THD *thd=current_thd; - MI_CHECK param; + HA_CHECK param; const char *save_proc_info=thd->proc_info; thd->proc_info="Creating index"; myisamchk_init(¶m); @@ -1038,7 +1038,7 @@ int ha_myisam::enable_indexes(uint mode) T_CREATE_MISSING_KEYS); param.myf_rw&= ~MY_WAIT_IF_FULL; param.sort_buffer_length= thd->variables.myisam_sort_buff_size; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; param.tmpdir=&mysql_tmpdir_list; if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair) { @@ -1685,7 +1685,7 @@ ulonglong ha_myisam::get_auto_increment() { ulonglong nr; int error; - byte key[MI_MAX_KEY_LENGTH]; + byte key[HA_MAX_KEY_LENGTH]; if (!table->s->next_number_key_offset) { // Autoincrement at key-start diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index 86efed27478..7a6522dd3ef 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -22,6 +22,7 @@ /* class for the the myisam handler */ #include +#include #include #define HA_RECOVER_NONE 0 /* No automatic recover */ @@ -40,7 +41,7 @@ class ha_myisam: public handler ulong int_table_flags; char *data_file_name, *index_file_name; bool can_enable_indexes; - int repair(THD *thd, MI_CHECK ¶m, bool optimize); + int repair(THD *thd, HA_CHECK ¶m, bool optimize); public: ha_myisam(TABLE_SHARE *table_arg); @@ -56,8 +57,8 @@ class ha_myisam: public handler HA_READ_ORDER | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MI_MAX_KEY; } - uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } - uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } uint checksum() const; virtual bool check_if_locking_is_allowed(uint sql_command, diff --git a/sql/ha_myisammrg.h b/sql/ha_myisammrg.h index 4327b1c17b9..db8002ceccf 100644 --- a/sql/ha_myisammrg.h +++ b/sql/ha_myisammrg.h @@ -47,8 +47,8 @@ class ha_myisammrg: public handler HA_READ_ORDER | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MI_MAX_KEY; } - uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } - uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } double scan_time() { return ulonglong2double(data_file_length) / IO_SIZE + file->tables; } diff --git a/sql/handler.h b/sql/handler.h index 261a813bbfa..83f8671df93 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -21,6 +21,7 @@ #pragma interface /* gcc class implementation */ #endif +#include #include #include @@ -233,6 +234,7 @@ enum legacy_db_type DB_TYPE_BLACKHOLE_DB, DB_TYPE_PARTITION_DB, DB_TYPE_BINLOG, + DB_TYPE_MARIA, DB_TYPE_DEFAULT=127 // Must be last }; diff --git a/sql/item_func.h b/sql/item_func.h index ccbbbab1df4..89a2b9a779b 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -1261,7 +1261,6 @@ public: /* for fulltext search */ -#include class Item_func_match :public Item_real_func { diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index f79472a54f8..41b73378d47 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1440,6 +1440,12 @@ extern handlerton partition_hton; #else extern SHOW_COMP_OPTION have_partition_db; #endif +#ifdef WITH_MARIA_STORAGE_ENGINE +extern handlerton maria_hton; +#define have_maria maria_hton.state +#else +extern SHOW_COMP_OPTION have_maria; +#endif extern handlerton myisam_hton; extern handlerton myisammrg_hton; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 71067630535..98378ed7eb4 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -27,6 +27,9 @@ #include "event.h" #include "ha_myisam.h" +#ifdef WITH_MARIA_STORAGE_ENGINE +#include "ha_maria.h" +#endif #ifdef WITH_INNOBASE_STORAGE_ENGINE #define OPT_INNODB_DEFAULT 1 @@ -529,6 +532,7 @@ char *mysqld_unix_port, *opt_mysql_tmpdir; const char **errmesg; /* Error messages */ const char *myisam_recover_options_str="OFF"; const char *myisam_stats_method_str="nulls_unequal"; +const char *maria_stats_method_str="nulls_unequal"; /* name of reference on left espression in rewritten IN subquery */ const char *in_left_expr_name= ""; /* name of additional condition */ @@ -4729,10 +4733,17 @@ enum options_mysqld OPT_MAX_LENGTH_FOR_SORT_DATA, OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE, OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE, + OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE, - OPT_MYISAM_USE_MMAP, + OPT_MYISAM_USE_MMAP, OPT_MYISAM_REPAIR_THREADS, OPT_MYISAM_STATS_METHOD, + + OPT_MARIA_BLOCK_SIZE, + OPT_MARIA_MAX_SORT_FILE_SIZE, OPT_MARIA_SORT_BUFFER_SIZE, + OPT_MARIA_USE_MMAP, OPT_MARIA_REPAIR_THREADS, + OPT_MARIA_STATS_METHOD, + OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT, OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT, OPT_OPEN_FILES_LIMIT, @@ -4746,7 +4757,7 @@ enum options_mysqld OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, - OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS, + OPT_WAIT_TIMEOUT, OPT_INNODB_MIRRORED_LOG_GROUPS, OPT_INNODB_LOG_FILES_IN_GROUP, OPT_INNODB_LOG_FILE_SIZE, @@ -5932,6 +5943,50 @@ log and this option does nothing anymore.", 0 #endif , 0, 2, 0, 1, 0}, +#ifdef WITH_MARIA_STORAGE_ENGINE + {"maria_block_size", OPT_MARIA_BLOCK_SIZE, + "Block size to be used for MARIA index pages.", + (gptr*) &maria_block_size, + (gptr*) &maria_block_size, 0, GET_ULONG, REQUIRED_ARG, + MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, + 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + {"maria_key_buffer_size", OPT_KEY_BUFFER_SIZE, + "The size of the buffer used for index blocks for Maria tables. Increase " + "this to get better index handling (for all reads and multiple writes) to " + "as much as you can afford; 64M on a 256M machine that mainly runs MySQL " + "is quite common.", + (gptr*) &maria_key_cache_var.param_buff_size, (gptr*) 0, + 0, (GET_ULL | GET_ASK_ADDR), + REQUIRED_ARG, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, MALLOC_OVERHEAD, + IO_SIZE, 0}, + {"maria_max_sort_file_size", OPT_MARIA_MAX_SORT_FILE_SIZE, + "Don't use the fast sort index method to created index if the temporary " + "file would get bigger than this.", + (gptr*) &global_system_variables.maria_max_sort_file_size, + (gptr*) &max_system_variables.maria_max_sort_file_size, 0, + GET_ULL, REQUIRED_ARG, (longlong) LONG_MAX, 0, (ulonglong) MAX_FILE_SIZE, + 0, 1024*1024, 0}, + {"maria_repair_threads", OPT_MARIA_REPAIR_THREADS, + "Number of threads to use when repairing maria tables. The value of 1 " + "disables parallel repair.", + (gptr*) &global_system_variables.maria_repair_threads, + (gptr*) &max_system_variables.maria_repair_threads, 0, + GET_ULONG, REQUIRED_ARG, 1, 1, ~0L, 0, 1, 0}, + {"maria_sort_buffer_size", OPT_MARIA_SORT_BUFFER_SIZE, + "The buffer that is allocated when sorting the index when doing a REPAIR " + "or when creating indexes with CREATE INDEX or ALTER TABLE.", + (gptr*) &global_system_variables.maria_sort_buff_size, + (gptr*) &max_system_variables.maria_sort_buff_size, 0, + GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0}, + {"maria_stats_method", OPT_MARIA_STATS_METHOD, + "Specifies how maria index statistics collection code should threat NULLs. " + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", + (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"max_allowed_packet", OPT_MAX_ALLOWED_PACKET, "Max packetlength to send/receive from to server.", (gptr*) &global_system_variables.max_allowed_packet, @@ -6028,12 +6083,6 @@ The minimum value for this variable is 4096.", (gptr*) &myisam_data_pointer_size, (gptr*) &myisam_data_pointer_size, 0, GET_ULONG, REQUIRED_ARG, 6, 2, 7, 0, 1, 0}, - {"myisam_max_extra_sort_file_size", OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, - "Deprecated option", - (gptr*) &global_system_variables.myisam_max_extra_sort_file_size, - (gptr*) &max_system_variables.myisam_max_extra_sort_file_size, - 0, GET_ULL, REQUIRED_ARG, (ulonglong) MI_MAX_TEMP_LENGTH, - 0, (ulonglong) MAX_FILE_SIZE, 0, 1, 0}, {"myisam_max_sort_file_size", OPT_MYISAM_MAX_SORT_FILE_SIZE, "Don't use the fast sort index method to created index if the temporary file would get bigger than this.", (gptr*) &global_system_variables.myisam_max_sort_file_size, @@ -6997,15 +7046,24 @@ static void mysql_init_variables(void) query_id= thread_id= 1L; strmov(server_version, MYSQL_SERVER_VERSION); myisam_recover_options_str= sql_mode_str= "OFF"; - myisam_stats_method_str= "nulls_unequal"; + myisam_stats_method_str= maria_stats_method_str= "nulls_unequal"; my_bind_addr = htonl(INADDR_ANY); threads.empty(); thread_cache.empty(); key_caches.empty(); if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str, - default_key_cache_base.length))) + default_key_cache_base.length))) exit(1); - multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */ +#ifdef WITH_MARIA_STORAGE_ENGINE + if (!(maria_key_cache= get_or_create_key_cache(maria_key_cache_base.str, + maria_key_cache_base.length))) + exit(1); + maria_key_cache->param_buff_size= maria_key_cache_var.param_buff_size; + maria_key_cache->param_block_size= maria_block_size; +#endif + + /* set key_cache_hash.default_value = dflt_key_cache */ + multi_keycache_init(); /* Set directory paths */ strmake(language, LANGUAGE, sizeof(language)-1); @@ -7692,7 +7750,6 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), int method; LINT_INIT(method_conv); - myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) { fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument); diff --git a/sql/set_var.cc b/sql/set_var.cc index f2694f651f4..c8c35d52099 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -55,6 +55,9 @@ #include #include #include +#ifdef WITH_MARIA_STORAGE_ENGINE +#include +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ extern bool berkeley_shared_data; @@ -150,6 +153,7 @@ static void fix_max_join_size(THD *thd, enum_var_type type); static void fix_query_cache_size(THD *thd, enum_var_type type); static void fix_query_cache_min_res_unit(THD *thd, enum_var_type type); static void fix_myisam_max_sort_file_size(THD *thd, enum_var_type type); +static void fix_maria_max_sort_file_size(THD *thd, enum_var_type type); static void fix_max_binlog_size(THD *thd, enum_var_type type); static void fix_max_relay_log_size(THD *thd, enum_var_type type); static void fix_max_connections(THD *thd, enum_var_type type); @@ -338,6 +342,14 @@ sys_var_thd_enum sys_myisam_stats_method("myisam_stats_method", &myisam_stats_method_typelib, NULL); +sys_var_thd_ulonglong sys_maria_max_sort_file_size("maria_max_sort_file_size", &SV::maria_max_sort_file_size, fix_maria_max_sort_file_size, 1); +sys_var_thd_ulong sys_maria_repair_threads("maria_repair_threads", &SV::maria_repair_threads); +sys_var_thd_ulong sys_maria_sort_buffer_size("maria_sort_buffer_size", &SV::maria_sort_buff_size); +sys_var_thd_enum sys_maria_stats_method("maria_stats_method", + &SV::maria_stats_method, + &myisam_stats_method_typelib, + NULL); + sys_var_thd_ulong sys_net_buffer_length("net_buffer_length", &SV::net_buffer_length); sys_var_thd_ulong sys_net_read_timeout("net_read_timeout", @@ -637,6 +649,7 @@ sys_var_have_variable sys_have_federated_db("have_federated_engine", &have_federated_db); sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); +sys_var_have_variable sys_have_maria("have_maria", &have_maria); sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); sys_var_have_variable sys_have_partition_db("have_partitioning", @@ -760,6 +773,7 @@ SHOW_VAR init_vars[]= { {sys_have_federated_db.name,(char*) &have_federated_db, SHOW_HAVE}, {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, + {sys_have_maria.name, (char*) &have_maria, SHOW_HAVE}, {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, {sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE}, @@ -834,6 +848,15 @@ SHOW_VAR init_vars[]= { {sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS}, {"lower_case_file_system", (char*) &lower_case_file_system, SHOW_MY_BOOL}, {"lower_case_table_names", (char*) &lower_case_table_names, SHOW_INT}, + + {sys_maria_max_sort_file_size.name, (char*) &sys_maria_max_sort_file_size, + SHOW_SYS}, + {sys_maria_repair_threads.name, (char*) &sys_maria_repair_threads, + SHOW_SYS}, + {sys_maria_sort_buffer_size.name, (char*) &sys_maria_sort_buffer_size, + SHOW_SYS}, + {sys_maria_stats_method.name, (char*) &sys_maria_stats_method, SHOW_SYS}, + {sys_max_allowed_packet.name,(char*) &sys_max_allowed_packet, SHOW_SYS}, {sys_max_binlog_cache_size.name,(char*) &sys_max_binlog_cache_size, SHOW_SYS}, {sys_max_binlog_size.name, (char*) &sys_max_binlog_size, SHOW_SYS}, @@ -1116,6 +1139,16 @@ fix_myisam_max_sort_file_size(THD *thd, enum_var_type type) (my_off_t) global_system_variables.myisam_max_sort_file_size; } +static void +fix_maria_max_sort_file_size(THD *thd, enum_var_type type) +{ +#ifdef WITH_MARIA_STORAGE_ENGINE + maria_max_temp_length= + (my_off_t) global_system_variables.myisam_max_sort_file_size; +#endif +} + + /* Set the OPTION_BIG_SELECTS flag if max_join_size == HA_POS_ERROR */ @@ -2299,6 +2332,7 @@ void sys_var_collation_server::set_default(THD *thd, enum_var_type type) LEX_STRING default_key_cache_base= {(char *) "default", 7 }; +LEX_STRING maria_key_cache_base= {(char *) "maria", 5 }; static KEY_CACHE zero_key_cache; @@ -2308,7 +2342,7 @@ KEY_CACHE *get_key_cache(LEX_STRING *cache_name) if (!cache_name || ! cache_name->length) cache_name= &default_key_cache_base; return ((KEY_CACHE*) find_named(&key_caches, - cache_name->str, cache_name->length, 0)); + cache_name->str, cache_name->length, 0)); } diff --git a/sql/set_var.h b/sql/set_var.h index f62d6ce8d2a..53b14369116 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -985,6 +985,7 @@ public: extern sys_var_thd_bool sys_old_alter_table; extern sys_var_thd_bool sys_old_passwords; extern LEX_STRING default_key_cache_base; +extern LEX_STRING maria_key_cache_base; /* For sql_yacc */ struct sys_var_with_base diff --git a/sql/sql_class.h b/sql/sql_class.h index cc1c5488ae5..580a651b2c2 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -180,7 +180,7 @@ class Time_zone; struct system_variables { - ulonglong myisam_max_extra_sort_file_size; + ulonglong maria_max_sort_file_size; ulonglong myisam_max_sort_file_size; ha_rows select_limit; ha_rows max_join_size; @@ -195,6 +195,9 @@ struct system_variables ulong max_sort_length; ulong max_tmp_tables; ulong max_insert_delayed_threads; + ulong maria_repair_threads; + ulong maria_sort_buff_size; + ulong maria_stats_method; ulong multi_range_count; ulong myisam_repair_threads; ulong myisam_sort_buff_size; diff --git a/sql/sql_sort.h b/sql/sql_sort.h index 1831c4a2f3d..e39ea1aaf38 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -35,7 +35,9 @@ the callback function 'unpack_addon_fields'. */ -typedef struct st_sort_addon_field { /* Sort addon packed field */ +typedef struct st_sort_addon_field +{ + /* Sort addon packed field */ Field *field; /* Original field */ uint offset; /* Offset from the last sorted field */ uint null_offset; /* Offset to to null bit from the last sorted field */ @@ -43,13 +45,6 @@ typedef struct st_sort_addon_field { /* Sort addon packed field */ uint8 null_bit; /* Null bit mask for the field */ } SORT_ADDON_FIELD; -typedef struct st_buffpek { /* Struktur om sorteringsbuffrarna */ - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; typedef struct st_sort_param { uint rec_length; /* Length of sorted records */ diff --git a/storage/Makefile.am b/storage/Makefile.am index 95c49b50890..8c68f105a16 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -21,7 +21,7 @@ AUTOMAKE_OPTIONS = foreign # These are built from source in the Docs directory EXTRA_DIST = SUBDIRS = -DIST_SUBDIRS = . csv example bdb heap innobase myisam myisammrg ndb archive +DIST_SUBDIRS = . csv example bdb heap innobase myisam myisammrg maria ndb archive # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 38575d26242..e9f3d382595 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -112,7 +112,8 @@ handlerton tina_hton= { NULL, /* Fill FILES Table */ HTON_CAN_RECREATE, NULL, /* binlog_func */ - NULL /* binlog_log_query */ + NULL, /* binlog_log_query */ + NULL /* release_temporary_latches */ }; /***************************************************************************** diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am new file mode 100644 index 00000000000..bf22428c18f --- /dev/null +++ b/storage/maria/Makefile.am @@ -0,0 +1,106 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c cmakelists.txt +pkgdata_DATA = ma_test_all ma_test_all.res + +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include +LDADD = @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +pkglib_LIBRARIES = libmaria.a +bin_PROGRAMS = maria_chk maria_log maria_pack maria_ftdump +maria_chk_DEPENDENCIES= $(LIBRARIES) +maria_log_DEPENDENCIES= $(LIBRARIES) +maria_pack_DEPENDENCIES=$(LIBRARIES) +noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test +noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h +ma_test1_DEPENDENCIES= $(LIBRARIES) +ma_test2_DEPENDENCIES= $(LIBRARIES) +ma_test3_DEPENDENCIES= $(LIBRARIES) +#ma_ft_test1_DEPENDENCIES= $(LIBRARIES) +#ma_ft_eval_DEPENDENCIES= $(LIBRARIES) +maria_ftdump_DEPENDENCIES= $(LIBRARIES) +ma_rt_test_DEPENDENCIES= $(LIBRARIES) +ma_sp_test_DEPENDENCIES= $(LIBRARIES) +libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ + ma_rnext.c ma_rnext_same.c \ + ma_search.c ma_page.c ma_key.c ma_locking.c \ + ma_rrnd.c ma_scan.c ma_cache.c \ + ma_statrec.c ma_packrec.c ma_dynrec.c \ + ma_update.c ma_write.c ma_unique.c \ + ma_delete.c \ + ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \ + ma_rsamepos.c ma_panic.c ma_close.c ma_create.c\ + ma_range.c ma_dbug.c ma_checksum.c ma_log.c \ + ma_changed.c ma_static.c ma_delete_all.c \ + ma_delete_table.c ma_rename.c ma_check.c \ + ma_keycache.c ma_preload.c ma_ft_parser.c \ + ma_ft_update.c ma_ft_boolean_search.c \ + ma_ft_nlq_search.c ft_maria.c ma_sort.c \ + ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ + ma_sp_key.c +CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? +DEFS = + +SUFFIXES = .sh + +.sh: + @RM@ -f $@ $@-t + @SED@ \ + -e 's!@''bindir''@!$(bindir)!g' \ + -e 's!@''scriptdir''@!$(bindir)!g' \ + -e 's!@''prefix''@!$(prefix)!g' \ + -e 's!@''datadir''@!$(datadir)!g' \ + -e 's!@''localstatedir''@!$(localstatedir)!g' \ + -e 's!@''libexecdir''@!$(libexecdir)!g' \ + -e 's!@''CC''@!@CC@!'\ + -e 's!@''CXX''@!@CXX@!'\ + -e 's!@''GXX''@!@GXX@!'\ + -e 's!@''PERL''@!@PERL@!' \ + -e 's!@''CFLAGS''@!@SAVE_CFLAGS@!'\ + -e 's!@''CXXFLAGS''@!@SAVE_CXXFLAGS@!'\ + -e 's!@''LDFLAGS''@!@SAVE_LDFLAGS@!'\ + -e 's!@''VERSION''@!@VERSION@!' \ + -e 's!@''MYSQL_SERVER_SUFFIX''@!@MYSQL_SERVER_SUFFIX@!' \ + -e 's!@''COMPILATION_COMMENT''@!@COMPILATION_COMMENT@!' \ + -e 's!@''MACHINE_TYPE''@!@MACHINE_TYPE@!' \ + -e 's!@''HOSTNAME''@!@HOSTNAME@!' \ + -e 's!@''SYSTEM_TYPE''@!@SYSTEM_TYPE@!' \ + -e 's!@''CHECK_PID''@!@CHECK_PID@!' \ + -e 's!@''FIND_PROC''@!@FIND_PROC@!' \ + -e 's!@''MYSQLD_DEFAULT_SWITCHES''@!@MYSQLD_DEFAULT_SWITCHES@!' \ + -e 's!@''MYSQL_UNIX_ADDR''@!@MYSQL_UNIX_ADDR@!' \ + -e 's!@''TARGET_LINUX''@!@TARGET_LINUX@!' \ + -e "s!@""CONF_COMMAND""@!@CONF_COMMAND@!" \ + -e 's!@''MYSQLD_USER''@!@MYSQLD_USER@!' \ + -e 's!@''sysconfdir''@!@sysconfdir@!' \ + -e 's!@''SHORT_MYSQL_INTRO''@!@SHORT_MYSQL_INTRO@!' \ + -e 's!@''SHARED_LIB_VERSION''@!@SHARED_LIB_VERSION@!' \ + -e 's!@''MYSQL_BASE_VERSION''@!@MYSQL_BASE_VERSION@!' \ + -e 's!@''MYSQL_NO_DASH_VERSION''@!@MYSQL_NO_DASH_VERSION@!' \ + -e 's!@''MYSQL_TCP_PORT''@!@MYSQL_TCP_PORT@!' \ + -e 's!@''PERL_DBI_VERSION''@!@PERL_DBI_VERSION@!' \ + -e 's!@''PERL_DBD_VERSION''@!@PERL_DBD_VERSION@!' \ + -e 's!@''PERL_DATA_DUMPER''@!@PERL_DATA_DUMPER@!' \ + $< > $@-t + @CHMOD@ +x $@-t + @MV@ $@-t $@ + +# Don't update the files from bitkeeper +%::SCCS/s.% diff --git a/storage/maria/cmakelists.txt b/storage/maria/cmakelists.txt new file mode 100644 index 00000000000..3ba7aba4555 --- /dev/null +++ b/storage/maria/cmakelists.txt @@ -0,0 +1,26 @@ +SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") +SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include) +ADD_LIBRARY(myisam ft_boolean_search.c ft_nlq_search.c ft_parser.c ft_static.c ft_stem.c + ft_stopwords.c ft_update.c mi_cache.c mi_changed.c mi_check.c + mi_checksum.c mi_close.c mi_create.c mi_dbug.c mi_delete.c + mi_delete_all.c mi_delete_table.c mi_dynrec.c mi_extra.c mi_info.c + mi_key.c mi_keycache.c mi_locking.c mi_log.c mi_open.c + mi_packrec.c mi_page.c mi_panic.c mi_preload.c mi_range.c mi_rename.c + mi_rfirst.c mi_rlast.c mi_rnext.c mi_rnext_same.c mi_rprev.c mi_rrnd.c + mi_rsame.c mi_rsamepos.c mi_scan.c mi_search.c mi_static.c mi_statrec.c + mi_unique.c mi_update.c mi_write.c rt_index.c rt_key.c rt_mbr.c + rt_split.c sort.c sp_key.c ft_eval.h myisamdef.h rt_index.h mi_rkey.c) + +ADD_EXECUTABLE(myisam_ftdump myisam_ftdump.c) +TARGET_LINK_LIBRARIES(myisam_ftdump myisam mysys dbug strings zlib wsock32) + +ADD_EXECUTABLE(myisamchk myisamchk.c) +TARGET_LINK_LIBRARIES(myisamchk myisam mysys dbug strings zlib wsock32) + +ADD_EXECUTABLE(myisamlog myisamlog.c) +TARGET_LINK_LIBRARIES(myisamlog myisam mysys dbug strings zlib wsock32) + +ADD_EXECUTABLE(myisampack myisampack.c) +TARGET_LINK_LIBRARIES(myisampack myisam mysys dbug strings zlib wsock32) diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c new file mode 100644 index 00000000000..7104c6704ba --- /dev/null +++ b/storage/maria/ft_maria.c @@ -0,0 +1,49 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and maria +*/ + +#include "ma_ftdefs.h" + +FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query, + query_len, cs); + else + res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len, + flags, record); + return res; +} + +const struct _ft_vft _ma_ft_vft_nlq = { + maria_ft_nlq_read_next, maria_ft_nlq_find_relevance, + maria_ft_nlq_close_search, maria_ft_nlq_get_relevance, + maria_ft_nlq_reinit_search +}; +const struct _ft_vft _ma_ft_vft_boolean = { + maria_ft_boolean_read_next, maria_ft_boolean_find_relevance, + maria_ft_boolean_close_search, maria_ft_boolean_get_relevance, + maria_ft_boolean_reinit_search +}; + diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c new file mode 100644 index 00000000000..d6061c647ec --- /dev/null +++ b/storage/maria/ma_cache.c @@ -0,0 +1,108 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions for read record cacheing with maria + Used for reading dynamic/compressed records from datafile. + + Can fetch data directly from file (outside cache), + if reading a small chunk straight before the cached part (with possible + overlap). + + Can be explicitly asked not to use cache (by not setting READING_NEXT in + flag) - useful for occasional out-of-cache reads, when the next read is + expected to hit the cache again. + + Allows "partial read" errors in the record header (when READING_HEADER flag + is set) - unread part is bzero'ed + + Note: out-of-cache reads are enabled for shared IO_CACHE's too, + as these reads will be cached by OS cache (and my_pread is always atomic) +*/ + + +#include "maria_def.h" + +int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, uint length, + int flag) +{ + uint read_length,in_buff_length; + my_off_t offset; + char *in_buff_pos; + DBUG_ENTER("_ma_read_cache"); + + if (pos < info->pos_in_file) + { + read_length=length; + if ((my_off_t) read_length > (my_off_t) (info->pos_in_file-pos)) + read_length=(uint) (info->pos_in_file-pos); + info->seek_not_done=1; + if (my_pread(info->file,buff,read_length,pos,MYF(MY_NABP))) + DBUG_RETURN(1); + if (!(length-=read_length)) + DBUG_RETURN(0); + pos+=read_length; + buff+=read_length; + } + if (pos >= info->pos_in_file && + (offset= (my_off_t) (pos - info->pos_in_file)) < + (my_off_t) (info->read_end - info->request_pos)) + { + in_buff_pos=info->request_pos+(uint) offset; + in_buff_length= min(length,(uint) (info->read_end-in_buff_pos)); + memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length); + if (!(length-=in_buff_length)) + DBUG_RETURN(0); + pos+=in_buff_length; + buff+=in_buff_length; + } + else + in_buff_length=0; + if (flag & READING_NEXT) + { + if (pos != (info->pos_in_file + + (uint) (info->read_end - info->request_pos))) + { + info->pos_in_file=pos; /* Force start here */ + info->read_pos=info->read_end=info->request_pos; /* Everything used */ + info->seek_not_done=1; + } + else + info->read_pos=info->read_end; /* All block used */ + if (!(*info->read_function)(info,buff,length)) + DBUG_RETURN(0); + read_length=info->error; + } + else + { + info->seek_not_done=1; + if ((read_length=my_pread(info->file,buff,length,pos,MYF(0))) == length) + DBUG_RETURN(0); + } + if (!(flag & READING_HEADER) || (int) read_length == -1 || + read_length+in_buff_length < 3) + { + DBUG_PRINT("error", + ("Error %d reading next-multi-part block (Got %d bytes)", + my_errno, (int) read_length)); + if (!my_errno || my_errno == -1) + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); + } + bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length - + read_length); + DBUG_RETURN(0); +} /* _ma_read_cache */ diff --git a/storage/maria/ma_changed.c b/storage/maria/ma_changed.c new file mode 100644 index 00000000000..9e86212baa6 --- /dev/null +++ b/storage/maria/ma_changed.c @@ -0,0 +1,34 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Check if somebody has changed table since last check. */ + +#include "maria_def.h" + + /* Return 0 if table isn't changed */ + +int maria_is_changed(MARIA_HA *info) +{ + int result; + DBUG_ENTER("maria_is_changed"); + if (fast_ma_readinfo(info)) + DBUG_RETURN(-1); + VOID(_ma_writeinfo(info,0)); + result=(int) info->data_changed; + info->data_changed=0; + DBUG_PRINT("exit",("result: %d",result)); + DBUG_RETURN(result); +} diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c new file mode 100644 index 00000000000..4680caed2da --- /dev/null +++ b/storage/maria/ma_check.c @@ -0,0 +1,4301 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Describe, check and repair of MARIA tables */ + +#include "ma_ftdefs.h" +#include +#include +#include +#include +#ifdef HAVE_SYS_VADVISE_H +#include +#endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif +#include "ma_rt_index.h" + +#ifndef USE_RAID +#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G) +#define my_raid_delete(A,B,C) my_delete(A,B) +#endif + + /* Functions defined in this file */ + +static int check_k_link(HA_CHECK *param, MARIA_HA *info,uint nr); +static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, + my_off_t page, uchar *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level); +static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo); +static ha_checksum calc_checksum(ha_rows count); +static int writekeys(HA_CHECK *param, MARIA_HA *info,byte *buff, + my_off_t filepos); +static int sort_one_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, + my_off_t pagepos, File new_file); +static int sort_key_read(MARIA_SORT_PARAM *sort_param,void *key); +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param,void *key); +static int sort_get_next_record(MARIA_SORT_PARAM *sort_param); +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,const void *b); +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, const void *a); +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a); +static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo, + uchar *key); +static int sort_insert_key(MARIA_SORT_PARAM *sort_param, + reg1 SORT_KEY_BLOCKS *key_block, + uchar *key, my_off_t prev_block); +static int sort_delete_record(MARIA_SORT_PARAM *sort_param); +/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/ +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, + uint buffer_length); +static ha_checksum maria_byte_checksum(const byte *buf, uint length); +static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); + +void mariachk_init(HA_CHECK *param) +{ + bzero((gptr) param,sizeof(*param)); + param->opt_follow_links=1; + param->keys_in_use= ~(ulonglong) 0; + param->search_after_block=HA_OFFSET_ERROR; + param->auto_increment_value= 0; + param->use_buffers=USE_BUFFER_INIT; + param->read_buffer_length=READ_BUFFER_INIT; + param->write_buffer_length=READ_BUFFER_INIT; + param->sort_buffer_length=SORT_BUFFER_INIT; + param->sort_key_blocks=BUFFERS_WHEN_SORTING; + param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; + param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL); + param->start_check_pos=0; + param->max_record_length= LONGLONG_MAX; + param->key_cache_block_size= KEY_CACHE_BLOCK_SIZE; + param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; +} + + /* Check the status flags for the table */ + +int maria_chk_status(HA_CHECK *param, register MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + + if (maria_is_crashed_on_repair(info)) + _ma_check_print_warning(param, + "Table is marked as crashed and last repair failed"); + else if (maria_is_crashed(info)) + _ma_check_print_warning(param, + "Table is marked as crashed"); + if (share->state.open_count != (uint) (info->s->global_changed ? 1 : 0)) + { + /* Don't count this as a real warning, as check can correct this ! */ + uint save=param->warning_printed; + _ma_check_print_warning(param, + share->state.open_count==1 ? + "%d client is using or hasn't closed the table properly" : + "%d clients are using or haven't closed the table properly", + share->state.open_count); + /* If this will be fixed by the check, forget the warning */ + if (param->testflag & T_UPDATE_STATE) + param->warning_printed=save; + } + return 0; +} + + /* Check delete links */ + +int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag) +{ + reg2 ha_rows i; + uint delete_link_length; + my_off_t empty,next_link,old_link; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_del"); + + LINT_INIT(old_link); + param->record_checksum=0; + delete_link_length=((info->s->options & HA_OPTION_PACK_RECORD) ? 20 : + info->s->rec_reflength+1); + + if (!(test_flag & T_SILENT)) + puts("- check record delete-chain"); + + next_link=info->s->state.dellink; + if (info->state->del == 0) + { + if (test_flag & T_VERBOSE) + { + puts("No recordlinks"); + } + } + else + { + if (test_flag & T_VERBOSE) + printf("Recordlinks: "); + empty=0; + for (i= info->state->del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + if (test_flag & T_VERBOSE) + printf(" %9s",llstr(next_link,buff)); + if (next_link >= info->state->data_file_length) + goto wrong; + if (my_pread(info->dfile,(char*) buff,delete_link_length, + next_link,MYF(MY_NABP))) + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Can't read delete-link at filepos: %s", + llstr(next_link,buff)); + DBUG_RETURN(1); + } + if (*buff != '\0') + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Record at pos: %s is not remove-marked", + llstr(next_link,buff)); + goto wrong; + } + if (info->s->options & HA_OPTION_PACK_RECORD) + { + my_off_t prev_link=mi_sizekorr(buff+12); + if (empty && prev_link != old_link) + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2)); + goto wrong; + } + old_link=next_link; + next_link=mi_sizekorr(buff+4); + empty+=mi_uint3korr(buff+1); + } + else + { + param->record_checksum+=(ha_checksum) next_link; + next_link= _ma_rec_pos(info->s,(uchar*) buff+1); + empty+=info->s->base.pack_reclength; + } + } + if (test_flag & T_VERBOSE) + puts("\n"); + if (empty != info->state->empty) + { + _ma_check_print_warning(param, + "Found %s deleted space in delete link chain. Should be %s", + llstr(empty,buff2), + llstr(info->state->empty,buff)); + } + if (next_link != HA_OFFSET_ERROR) + { + _ma_check_print_error(param, + "Found more than the expected %s deleted rows in delete link chain", + llstr(info->state->del, buff)); + goto wrong; + } + if (i != 0) + { + _ma_check_print_error(param, + "Found %s deleted rows in delete link chain. Should be %s", + llstr(info->state->del - i, buff2), + llstr(info->state->del, buff)); + goto wrong; + } + } + DBUG_RETURN(0); + +wrong: + param->testflag|=T_RETRY_WITHOUT_QUICK; + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"record delete-link-chain corrupted"); + DBUG_RETURN(1); +} /* maria_chk_del */ + + + /* Check delete links in index file */ + +static int check_k_link(HA_CHECK *param, register MARIA_HA *info, uint nr) +{ + my_off_t next_link; + uint block_size=(nr+1)*MARIA_MIN_KEY_BLOCK_LENGTH; + ha_rows records; + char llbuff[21],*buff; + DBUG_ENTER("check_k_link"); + + if (param->testflag & T_VERBOSE) + printf("block_size %4d:",block_size); + + next_link=info->s->state.key_del[nr]; + records= (ha_rows) (info->state->key_file_length / block_size); + while (next_link != HA_OFFSET_ERROR && records > 0) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + if (param->testflag & T_VERBOSE) + printf("%16s",llstr(next_link,llbuff)); + if (next_link > info->state->key_file_length || + next_link & (info->s->blocksize-1)) + DBUG_RETURN(1); + if (!(buff=key_cache_read(info->s->key_cache, + info->s->kfile, next_link, DFLT_INIT_HITS, + (byte*) info->buff, + maria_block_size, block_size, 1))) + DBUG_RETURN(1); + next_link=mi_sizekorr(buff); + records--; + param->key_file_blocks+=block_size; + } + if (param->testflag & T_VERBOSE) + { + if (next_link != HA_OFFSET_ERROR) + printf("%16s\n",llstr(next_link,llbuff)); + else + puts(""); + } + DBUG_RETURN (next_link != HA_OFFSET_ERROR); +} /* check_k_link */ + + + /* Check sizes of files */ + +int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) +{ + int error=0; + register my_off_t skr,size; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_size"); + + if (!(param->testflag & T_SILENT)) puts("- check file-size"); + + /* The following is needed if called externally (not from mariachk) */ + flush_key_blocks(info->s->key_cache, + info->s->kfile, FLUSH_FORCE_WRITE); + + size=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); + if ((skr=(my_off_t) info->state->key_file_length) != size) + { + /* Don't give error if file generated by mariapack */ + if (skr > size && maria_is_any_key_active(info->s->state.key_map)) + { + error=1; + _ma_check_print_error(param, + "Size of indexfile is: %-8s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + else + _ma_check_print_warning(param, + "Size of indexfile is: %-8s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + if (!(param->testflag & T_VERY_SILENT) && + ! (info->s->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(info->state->key_file_length) > + ulonglong2double(info->s->base.margin_key_file_length)*0.9) + _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used", + llstr(info->state->key_file_length,buff), + llstr(info->s->base.max_key_file_length-1,buff)); + + size=my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)); + skr=(my_off_t) info->state->data_file_length; + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + skr+= MEMMAP_EXTRA_MARGIN; +#ifdef USE_RELOC + if (info->data_file_type == STATIC_RECORD && + skr < (my_off_t) info->s->base.reloc*info->s->base.min_pack_length) + skr=(my_off_t) info->s->base.reloc*info->s->base.min_pack_length; +#endif + if (skr != size) + { + info->state->data_file_length=size; /* Skip other errors */ + if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN) + { + error=1; + _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + } + else + { + _ma_check_print_warning(param, + "Size of datafile is: %-9s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + } + if (!(param->testflag & T_VERY_SILENT) && + !(info->s->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(info->state->data_file_length) > + (ulonglong2double(info->s->base.max_data_file_length)*0.9)) + _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used", + llstr(info->state->data_file_length,buff), + llstr(info->s->base.max_data_file_length-1,buff2)); + DBUG_RETURN(error); +} /* maria_chk_size */ + + + /* Check keys */ + +int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) +{ + uint key,found_keys=0,full_text_keys=0,result=0; + ha_rows keys; + ha_checksum old_record_checksum,init_checksum; + my_off_t all_keydata,all_totaldata,key_totlength,length; + ulong *rec_per_key_part; + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *keyinfo; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_key"); + + if (!(param->testflag & T_SILENT)) + puts("- check key delete-chain"); + + param->key_file_blocks=info->s->base.keystart; + for (key=0 ; key < info->s->state.header.max_block_size ; key++) + if (check_k_link(param,info,key)) + { + if (param->testflag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"key delete-link-chain corrupted"); + DBUG_RETURN(-1); + } + + if (!(param->testflag & T_SILENT)) puts("- check index reference"); + + all_keydata=all_totaldata=key_totlength=0; + old_record_checksum=0; + init_checksum=param->record_checksum; + if (!(share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + old_record_checksum=calc_checksum(info->state->records+info->state->del-1)* + share->base.pack_reclength; + rec_per_key_part= param->rec_per_key_part; + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++) + { + param->key_crc[key]=0; + if (! maria_is_key_active(share->state.key_map, key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part + + (uint) (rec_per_key_part - param->rec_per_key_part)), + keyinfo->keysegs*sizeof(*rec_per_key_part)); + continue; + } + found_keys++; + + param->record_checksum=init_checksum; + + bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); + bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); + + if ((!(param->testflag & T_SILENT))) + printf ("- check data record references index: %d\n",key+1); + if (keyinfo->flag & HA_FULLTEXT) + full_text_keys++; + if (share->state.key_root[key] == HA_OFFSET_ERROR && + (info->state->records == 0 || keyinfo->flag & HA_FULLTEXT)) + goto do_stat; + if (!_ma_fetch_keypage(info,keyinfo,share->state.key_root[key], + DFLT_INIT_HITS,info->buff,0)) + { + _ma_check_print_error(param,"Can't read indexpage from filepos: %s", + llstr(share->state.key_root[key],buff)); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + param->key_file_blocks+=keyinfo->block_length; + keys=0; + param->keydata=param->totaldata=0; + param->key_blocks=0; + param->max_level=0; + if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff, + &keys, param->key_crc+key,1)) + DBUG_RETURN(-1); + if(!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))) + { + if (keys != info->state->records) + { + _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff), + llstr(info->state->records,buff2)); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + if (found_keys - full_text_keys == 1 && + ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) || + (param->testflag & T_DONT_CHECK_CHECKSUM))) + old_record_checksum=param->record_checksum; + else if (old_record_checksum != param->record_checksum) + { + if (key) + _ma_check_print_error(param,"Key %u doesn't point at same records that key 1", + key+1); + else + _ma_check_print_error(param,"Key 1 doesn't point at all records"); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + } + if ((uint) share->base.auto_key -1 == key) + { + /* Check that auto_increment key is bigger than max key value */ + ulonglong save_auto_value=info->s->state.auto_increment; + info->s->state.auto_increment=0; + info->lastinx=key; + _ma_read_key_record(info, 0L, info->rec_buff); + _ma_update_auto_increment(info, info->rec_buff); + if (info->s->state.auto_increment > save_auto_value) + { + _ma_check_print_warning(param, + "Auto-increment value: %s is smaller than max used value: %s", + llstr(save_auto_value,buff2), + llstr(info->s->state.auto_increment, buff)); + } + if (param->testflag & T_AUTO_INC) + { + set_if_bigger(info->s->state.auto_increment, + param->auto_increment_value); + } + else + info->s->state.auto_increment=save_auto_value; + + /* Check that there isn't a row with auto_increment = 0 in the table */ + maria_extra(info,HA_EXTRA_KEYREAD,0); + bzero(info->lastkey,keyinfo->seg->length); + if (!maria_rkey(info, info->rec_buff, key, (const byte*) info->lastkey, + keyinfo->seg->length, HA_READ_KEY_EXACT)) + { + /* Don't count this as a real warning, as mariachk can't correct it */ + uint save=param->warning_printed; + _ma_check_print_warning(param, + "Found row where the auto_increment column has the value 0"); + param->warning_printed=save; + } + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + } + + length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2; + if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L) + printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n", + key+1, + (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)), + (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/ + my_off_t2double(length)), + param->max_level); + all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length; + +do_stat: + if (param->testflag & T_STATISTICS) + maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + param->notnull_count: NULL, + (ulonglong)info->state->records); + } + if (param->testflag & T_INFO) + { + if (all_totaldata != 0L && found_keys > 0) + printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n", + (int) (my_off_t2double(all_keydata)*100.0/ + my_off_t2double(all_totaldata)), + (int) ((my_off_t2double(key_totlength) - + my_off_t2double(all_keydata))*100.0/ + my_off_t2double(key_totlength))); + else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map)) + puts(""); + } + if (param->key_file_blocks != info->state->key_file_length && + param->keys_in_use != ~(ulonglong) 0) + _ma_check_print_warning(param, "Some data are unreferenced in keyfile"); + if (found_keys != full_text_keys) + param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */ + else + param->record_checksum=0; + DBUG_RETURN(result); +} /* maria_chk_key */ + + +static int chk_index_down(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, uchar *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) +{ + char llbuff[22],llbuff2[22]; + if (page > info->state->key_file_length || (page & (info->s->blocksize -1))) + { + my_off_t max_length=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); + _ma_check_print_error(param,"Wrong pagepointer: %s at page: %s", + llstr(page,llbuff),llstr(page,llbuff2)); + + if (page+info->s->blocksize > max_length) + goto err; + info->state->key_file_length=(max_length & + ~ (my_off_t) (info->s->blocksize-1)); + } + if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0)) + { + _ma_check_print_error(param,"Can't read key from filepos: %s", + llstr(page,llbuff)); + goto err; + } + param->key_file_blocks+=keyinfo->block_length; + if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level)) + goto err; + + return 0; +err: + return 1; +} + + +/* + "Ignore NULLs" statistics collection method: process first index tuple. + + SYNOPSIS + maria_collect_stats_nonulls_first() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + key IN Key values tuple + + DESCRIPTION + Process the first index tuple - find out which prefix tuples don't + contain NULLs, and update the array of notnull counters accordingly. +*/ + +static +void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, + uchar *key) +{ + uint first_null, kp; + first_null= ha_find_null(keyseg, key) - keyseg; + /* + All prefix tuples that don't include keypart_{first_null} are not-null + tuples (and all others aren't), increment counters for them. + */ + for (kp= 0; kp < first_null; kp++) + notnull[kp]++; +} + + +/* + "Ignore NULLs" statistics collection method: process next index tuple. + + SYNOPSIS + maria_collect_stats_nonulls_next() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + prev_key IN Previous key values tuple + last_key IN Next key values tuple + + DESCRIPTION + Process the next index tuple: + 1. Find out which prefix tuples of last_key don't contain NULLs, and + update the array of notnull counters accordingly. + 2. Find the first keypart number where the prev_key and last_key tuples + are different(A), or last_key has NULL value(B), and return it, so the + caller can count number of unique tuples for each key prefix. We don't + need (B) to be counted, and that is compensated back in + maria_update_key_parts(). + + RETURN + 1 + number of first keypart where values differ or last_key tuple has NULL +*/ + +static +int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, + uchar *prev_key, uchar *last_key) +{ + uint diffs[2]; + uint first_null_seg, kp; + HA_KEYSEG *seg; + + /* + Find the first keypart where values are different or either of them is + NULL. We get results in diffs array: + diffs[0]= 1 + number of first different keypart + diffs[1]=offset: (last_key + diffs[1]) points to first value in + last_key that is NULL or different from corresponding + value in prev_key. + */ + ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs); + seg= keyseg + diffs[0] - 1; + + /* Find first NULL in last_key */ + first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg; + for (kp= 0; kp < first_null_seg; kp++) + notnull[kp]++; + + /* + Return 1+ number of first key part where values differ. Don't care if + these were NULLs and not .... We compensate for that in + maria_update_key_parts. + */ + return diffs[0]; +} + + + /* Check if index is ok */ + +static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, uchar *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) +{ + int flag; + uint used_length,comp_flag,nod_flag,key_length=0; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + my_off_t next_page,record; + char llbuff[22]; + uint diff_pos[2]; + DBUG_ENTER("chk_index"); + DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); + + /* TODO: implement appropriate check for RTree keys */ + if (keyinfo->flag & HA_SPATIAL) + DBUG_RETURN(0); + + if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for keyblock"); + DBUG_RETURN(-1); + } + + if (keyinfo->flag & HA_NOSAME) + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* Not real duplicates */ + else + comp_flag=SEARCH_SAME; /* Keys in positionorder */ + nod_flag=_ma_test_if_nod(buff); + used_length=maria_getint(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + + param->keydata+=used_length; param->totaldata+=keyinfo->block_length; /* INFO */ + param->key_blocks++; + if (level > param->max_level) + param->max_level=level; + + if (used_length > keyinfo->block_length) + { + _ma_check_print_error(param,"Wrong pageinfo at page: %s", + llstr(page,llbuff)); + goto err; + } + for ( ;; ) + { + if (*_ma_killed_ptr(param)) + goto err; + memcpy((char*) info->lastkey,(char*) key,key_length); + info->lastkey_length=key_length; + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + if (chk_index_down(param,info,keyinfo,next_page, + temp_buff,keys,key_checksum,level+1)) + goto err; + } + old_keypos=keypos; + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0) + break; + if (keypos > endpos) + { + _ma_check_print_error(param,"Wrong key block length at page: %s",llstr(page,llbuff)); + goto err; + } + if ((*keys)++ && + (flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length, + comp_flag, diff_pos)) >=0) + { + DBUG_DUMP("old",(byte*) info->lastkey, info->lastkey_length); + DBUG_DUMP("new",(byte*) key, key_length); + DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + + if (comp_flag & SEARCH_FIND && flag == 0) + _ma_check_print_error(param,"Found duplicated key at page %s",llstr(page,llbuff)); + else + _ma_check_print_error(param,"Key in wrong position at page %s",llstr(page,llbuff)); + goto err; + } + if (param->testflag & T_STATISTICS) + { + if (*keys != 1L) /* not first_key */ + { + if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) + ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, + diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg, + param->notnull_count, + info->lastkey, key); + } + param->unique_count[diff_pos[0]-1]++; + } + else + { + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, + key); + } + } + (*key_checksum)+= maria_byte_checksum((byte*) key, + key_length- info->s->rec_reflength); + record= _ma_dpos(info,0,key+key_length); + if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */ + { + uint off; + int subkeys; + get_key_full_length_rdonly(off, key); + subkeys=ft_sintXkorr(key+off); + if (subkeys < 0) + { + ha_rows tmp_keys=0; + if (chk_index_down(param,info,&info->s->ft2_keyinfo,record, + temp_buff,&tmp_keys,key_checksum,1)) + goto err; + if (tmp_keys + subkeys) + { + _ma_check_print_error(param, + "Number of words in the 2nd level tree " + "does not match the number in the header. " + "Parent word in on the page %s, offset %u", + llstr(page,llbuff), (uint) (old_keypos-buff)); + goto err; + } + (*keys)+=tmp_keys-1; + continue; + } + /* fall through */ + } + if (record >= info->state->data_file_length) + { +#ifndef DBUG_OFF + char llbuff2[22], llbuff3[22]; +#endif + _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff)); + DBUG_PRINT("test",("page: %s record: %s filelength: %s", + llstr(page,llbuff),llstr(record,llbuff2), + llstr(info->state->data_file_length,llbuff3))); + DBUG_DUMP("key",(byte*) key,key_length); + DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + goto err; + } + param->record_checksum+=(ha_checksum) record; + } + if (keypos != endpos) + { + _ma_check_print_error(param,"Keyblock size at page %s is not correct. Block length: %d key length: %d", + llstr(page,llbuff), used_length, (keypos - buff)); + goto err; + } + my_afree((byte*) temp_buff); + DBUG_RETURN(0); + err: + my_afree((byte*) temp_buff); + DBUG_RETURN(1); +} /* chk_index */ + + + /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */ + +static ha_checksum calc_checksum(ha_rows count) +{ + ulonglong sum,a,b; + DBUG_ENTER("calc_checksum"); + + sum=0; + a=count; b=count+1; + if (a & 1) + b>>=1; + else + a>>=1; + while (b) + { + if (b & 1) + sum+=a; + a<<=1; b>>=1; + } + DBUG_PRINT("exit",("sum: %lx",(ulong) sum)); + DBUG_RETURN((ha_checksum) sum); +} /* calc_checksum */ + + + /* Calc length of key in normal isam */ + +static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo) +{ + uint length; + HA_KEYSEG *keyseg; + DBUG_ENTER("isam_key_length"); + + length= info->s->rec_reflength; + for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) + length+= keyseg->length; + + DBUG_PRINT("exit",("length: %d",length)); + DBUG_RETURN(length); +} /* key_length */ + + + /* Check that record-link is ok */ + +int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) +{ + int error,got_error,flag; + uint key,left_length,b_type,field; + ha_rows records,del_blocks; + my_off_t used,empty,pos,splits,start_recpos, + del_length,link_used,start_block; + byte *record,*to; + char llbuff[22],llbuff2[22],llbuff3[22]; + ha_checksum intern_record_checksum; + ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY]; + my_bool static_row_size; + MARIA_KEYDEF *keyinfo; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("maria_chk_data_link"); + + if (!(param->testflag & T_SILENT)) + { + if (extend) + puts("- check records and index references"); + else + puts("- check record links"); + } + + if (!(record= (byte*) my_malloc(info->s->base.pack_reclength,MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for record"); + DBUG_RETURN(-1); + } + records=del_blocks=0; + used=link_used=splits=del_length=0; + intern_record_checksum=param->glob_crc=0; + LINT_INIT(left_length); LINT_INIT(start_recpos); LINT_INIT(to); + got_error=error=0; + empty=info->s->pack.header_length; + + /* Check how to calculate checksum of rows */ + static_row_size=1; + if (info->s->data_file_type == COMPRESSED_RECORD) + { + for (field=0 ; field < info->s->base.fields ; field++) + { + if (info->s->rec[field].base_type == FIELD_BLOB || + info->s->rec[field].base_type == FIELD_VARCHAR) + { + static_row_size=0; + break; + } + } + } + + pos=my_b_tell(¶m->read_cache); + bzero((char*) key_checksum, info->s->base.keys * sizeof(key_checksum[0])); + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + goto err2; + switch (info->s->data_file_type) { + case STATIC_RECORD: + if (my_b_read(¶m->read_cache,(byte*) record, + info->s->base.pack_reclength)) + goto err; + start_recpos=pos; + pos+=info->s->base.pack_reclength; + splits++; + if (*record == '\0') + { + del_blocks++; + del_length+=info->s->base.pack_reclength; + continue; /* Record removed */ + } + param->glob_crc+= _ma_static_checksum(info,record); + used+=info->s->base.pack_reclength; + break; + case DYNAMIC_RECORD: + flag=block_info.second_read=0; + block_info.next_filepos=pos; + do + { + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, + (start_block=block_info.next_filepos), + sizeof(block_info.header), + (flag ? 0 : READING_NEXT) | READING_HEADER)) + goto err; + if (start_block & (MARIA_DYN_ALIGN_SIZE-1)) + { + _ma_check_print_error(param,"Wrong aligned block at %s", + llstr(start_block,llbuff)); + goto err2; + } + b_type= _ma_get_block_info(&block_info,-1,start_block); + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & BLOCK_SYNC_ERROR) + { + if (flag) + { + _ma_check_print_error(param,"Unexpected byte: %d at link: %s", + (int) block_info.header[0], + llstr(start_block,llbuff)); + goto err2; + } + pos=block_info.filepos+block_info.block_len; + goto next; + } + if (b_type & BLOCK_DELETED) + { + if (block_info.block_len < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Deleted block with impossible length %lu at %s", + block_info.block_len,llstr(pos,llbuff)); + goto err2; + } + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + _ma_check_print_error(param,"Delete link points outside datafile at %s", + llstr(pos,llbuff)); + goto err2; + } + del_blocks++; + del_length+=block_info.block_len; + pos=block_info.filepos+block_info.block_len; + splits++; + goto next; + } + _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s", + block_info.header[0],block_info.header[1], + block_info.header[2], + llstr(start_block,llbuff)); + goto err2; + } + if (info->state->data_file_length < block_info.filepos+ + block_info.block_len) + { + _ma_check_print_error(param, + "Recordlink that points outside datafile at %s", + llstr(pos,llbuff)); + got_error=1; + break; + } + splits++; + if (!flag++) /* First block */ + { + start_recpos=pos; + pos=block_info.filepos+block_info.block_len; + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (info->s->base.blobs) + { + if (!(to= _ma_alloc_rec_buff(info, block_info.rec_len, + &info->rec_buff))) + { + _ma_check_print_error(param, + "Not enough memory (%lu) for blob at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + } + else + to= info->rec_buff; + left_length=block_info.rec_len; + } + if (left_length < block_info.data_len) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.data_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (_ma_read_cache(¶m->read_cache,(byte*) to,block_info.filepos, + (uint) block_info.data_len, + flag == 1 ? READING_NEXT : 0)) + goto err; + to+=block_info.data_len; + link_used+= block_info.filepos-start_block; + used+= block_info.filepos - start_block + block_info.data_len; + empty+=block_info.block_len-block_info.data_len; + left_length-=block_info.data_len; + if (left_length) + { + if (b_type & BLOCK_LAST) + { + _ma_check_print_error(param, + "Wrong record length %s of %s at %s", + llstr(block_info.rec_len-left_length,llbuff), + llstr(block_info.rec_len, llbuff2), + llstr(start_recpos,llbuff3)); + got_error=1; + break; + } + if (info->state->data_file_length < block_info.next_filepos) + { + _ma_check_print_error(param, + "Found next-recordlink that points outside datafile at %s", + llstr(block_info.filepos,llbuff)); + got_error=1; + break; + } + } + } while (left_length); + if (! got_error) + { + if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) == + MY_FILE_ERROR) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + } + else + { + info->checksum=_ma_checksum(info,record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) + { + if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, + test(info->s->calc_checksum))) + { + _ma_check_print_error(param,"Found wrong packed record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + } + } + if (!got_error) + param->glob_crc+= info->checksum; + } + } + else if (!flag) + pos=block_info.filepos+block_info.block_len; + break; + case COMPRESSED_RECORD: + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, pos, + info->s->pack.ref_length, READING_NEXT)) + goto err; + start_recpos=pos; + splits++; + VOID(_ma_pack_get_block_info(info,&block_info, -1, start_recpos)); + pos=block_info.filepos+block_info.rec_len; + if (block_info.rec_len < (uint) info->s->min_pack_length || + block_info.rec_len > (uint) info->s->max_pack_length) + { + _ma_check_print_error(param, + "Found block with wrong recordlength: %d at %s", + block_info.rec_len, llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (_ma_read_cache(¶m->read_cache,(byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, READING_NEXT)) + goto err; + if (_ma_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len)) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + } + if (static_row_size) + param->glob_crc+= _ma_static_checksum(info,record); + else + param->glob_crc+= _ma_checksum(info,record); + link_used+= (block_info.filepos - start_recpos); + used+= (pos-start_recpos); + } /* switch */ + if (! got_error) + { + intern_record_checksum+=(ha_checksum) start_recpos; + records++; + if (param->testflag & T_WRITE_LOOP && records % WRITE_COUNT == 0) + { + printf("%s\r", llstr(records,llbuff)); VOID(fflush(stdout)); + } + + /* Check if keys match the record */ + + for (key=0,keyinfo= info->s->keyinfo; key < info->s->base.keys; + key++,keyinfo++) + { + if (maria_is_key_active(info->s->state.key_map, key)) + { + if(!(keyinfo->flag & HA_FULLTEXT)) + { + uint key_length= _ma_make_key(info,key,info->lastkey,record, + start_recpos); + if (extend) + { + /* We don't need to lock the key tree here as we don't allow + concurrent threads when running mariachk + */ + int search_result= +#ifdef HAVE_RTREE_KEYS + (keyinfo->flag & HA_SPATIAL) ? + maria_rtree_find_first(info, key, info->lastkey, key_length, + SEARCH_SAME) : +#endif + _ma_search(info,keyinfo,info->lastkey,key_length, + SEARCH_SAME, info->s->state.key_root[key]); + if (search_result) + { + _ma_check_print_error(param,"Record at: %10s Can't find key for index: %2d", + llstr(start_recpos,llbuff),key+1); + if (error++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err2; + } + } + else + key_checksum[key]+=maria_byte_checksum((byte*) info->lastkey, + key_length); + } + } + } + } + else + { + got_error=0; + if (error++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err2; + } + next:; /* Next record */ + } + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + if (records != info->state->records) + { + _ma_check_print_error(param,"Record-count is not ok; is %-10s Should be: %s", + llstr(records,llbuff), llstr(info->state->records,llbuff2)); + error=1; + } + else if (param->record_checksum && + param->record_checksum != intern_record_checksum) + { + _ma_check_print_error(param, + "Keypointers and record positions doesn't match"); + error=1; + } + else if (param->glob_crc != info->state->checksum && + (info->s->options & + (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))) + { + _ma_check_print_warning(param, + "Record checksum is not the same as checksum stored in the index file\n"); + error=1; + } + else if (!extend) + { + for (key=0 ; key < info->s->base.keys; key++) + { + if (key_checksum[key] != param->key_crc[key] && + !(info->s->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL))) + { + _ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records", + key+1); + error=1; + } + } + } + + if (del_length != info->state->empty) + { + _ma_check_print_warning(param, + "Found %s deleted space. Should be %s", + llstr(del_length,llbuff2), + llstr(info->state->empty,llbuff)); + } + if (used+empty+del_length != info->state->data_file_length) + { + _ma_check_print_warning(param, + "Found %s record-data and %s unused data and %s deleted-data", + llstr(used,llbuff),llstr(empty,llbuff2), + llstr(del_length,llbuff3)); + _ma_check_print_warning(param, + "Total %s, Should be: %s", + llstr((used+empty+del_length),llbuff), + llstr(info->state->data_file_length,llbuff2)); + } + if (del_blocks != info->state->del) + { + _ma_check_print_warning(param, + "Found %10s deleted blocks Should be: %s", + llstr(del_blocks,llbuff), + llstr(info->state->del,llbuff2)); + } + if (splits != info->s->state.split) + { + _ma_check_print_warning(param, + "Found %10s parts Should be: %s parts", + llstr(splits,llbuff), + llstr(info->s->state.split,llbuff2)); + } + if (param->testflag & T_INFO) + { + if (param->warning_printed || param->error_printed) + puts(""); + if (used != 0 && ! param->error_printed) + { + printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n", + llstr(records,llbuff), (long)((used-link_used)/records), + (info->s->base.blobs ? 0.0 : + (ulonglong2double((ulonglong) info->s->base.reclength*records)- + my_off_t2double(used))/ + ulonglong2double((ulonglong) info->s->base.reclength*records)*100.0)); + printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n", + (ulonglong2double(used-link_used)/ulonglong2double(used-link_used+empty)*100.0), + (!records ? 100 : (int) (ulonglong2double(del_length+empty)/ + my_off_t2double(used)*100.0)), + ulonglong2double(splits - del_blocks) / records); + } + printf("Record blocks:%12s Delete blocks:%10s\n", + llstr(splits-del_blocks,llbuff),llstr(del_blocks,llbuff2)); + printf("Record data: %12s Deleted data: %10s\n", + llstr(used-link_used,llbuff),llstr(del_length,llbuff2)); + printf("Lost space: %12s Linkdata: %10s\n", + llstr(empty,llbuff),llstr(link_used,llbuff2)); + } + my_free((gptr) record,MYF(0)); + DBUG_RETURN (error); + err: + _ma_check_print_error(param,"got error: %d when reading datafile at record: %s",my_errno, llstr(records,llbuff)); + err2: + my_free((gptr) record,MYF(0)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); +} /* maria_chk_data_link */ + + + /* Recover old table by reading each record and writing all keys */ + /* Save new datafile-name in temp_filename */ + +int maria_repair(HA_CHECK *param, register MARIA_HA *info, + my_string name, int rep_quick) +{ + int error,got_error; + uint i; + ha_rows start_records,new_header_length; + my_off_t del; + File new_file; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO sort_info; + MARIA_SORT_PARAM sort_param; + DBUG_ENTER("maria_repair"); + + bzero((char *)&sort_info, sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + start_records=info->state->records; + new_header_length=(param->testflag & T_UNPACK) ? 0L : + share->pack.header_length; + got_error=1; + new_file= -1; + sort_param.sort_info=&sort_info; + + if (!(param->testflag & T_SILENT)) + { + printf("- recovering (with keycache) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + if (!param->using_global_keycache) + VOID(init_key_cache(maria_key_cache, param->key_cache_block_size, + param->use_buffers, 0, 0)); + + if (init_io_cache(¶m->read_cache,info->dfile, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME))) + { + bzero(&info->rec_cache,sizeof(info->rec_cache)); + goto err; + } + if (!rep_quick) + if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + !_ma_alloc_rec_buff(info, -1, &sort_param.rec_buff)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file=my_raid_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + share->base.raid_type, + share->base.raid_chunks, + share->base.raid_chunksize, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (maria_filecopy(param,new_file,info->dfile,0L,new_header_length, + "datafile-header")) + goto err; + info->s->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + if (param->testflag & T_UNPACK) + { + share->options&= ~HA_OPTION_COMPRESS_RECORD; + mi_int2store(share->state.header.options,share->options); + } + } + sort_info.info=info; + sort_info.param = param; + sort_param.read_cache=param->read_cache; + sort_param.pos=sort_param.max_pos=share->pack.header_length; + sort_param.filepos=new_header_length; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)); + sort_info.dupp=0; + sort_param.fix_datafile= (my_bool) (! rep_quick); + sort_param.master=1; + sort_info.max_records= ~(ha_rows) 0; + + set_data_file_type(&sort_info, share); + del=info->state->del; + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + param->calc_checksum=1; + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + /* + Clear all keys. Note that all key blocks allocated until now remain + "dead" parts of the key file. (Bug #4692) + */ + for (i=0 ; i < info->s->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + + /* Drop the delete chain. */ + for (i=0 ; i < share->state.header.max_block_size ; i++) + share->state.key_del[i]= HA_OFFSET_ERROR; + + /* + If requested, activate (enable) all keys in key_map. In this case, + all indexes will be (re-)built. + */ + if (param->testflag & T_CREATE_MISSING_KEYS) + maria_set_all_keys_active(share->state.key_map, share->base.keys); + + info->state->key_file_length=share->base.keystart; + + maria_lock_memory(param); /* Everything is alloced */ + + /* Re-create all keys, which are set in key_map. */ + while (!(error=sort_get_next_record(&sort_param))) + { + if (writekeys(param,info,(byte*)sort_param.record,sort_param.filepos)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY) + goto err; + DBUG_DUMP("record",(byte*) sort_param.record,share->base.pack_reclength); + _ma_check_print_info(param,"Duplicate key %2d for record at %10s against new record at %10s", + info->errkey+1, + llstr(sort_param.start_recpos,llbuff), + llstr(info->dupp_key_pos,llbuff2)); + if (param->testflag & T_VERBOSE) + { + VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey, + sort_param.record,0L)); + _ma_print_key(stdout,share->keyinfo[info->errkey].seg,info->lastkey, + USE_WHOLE_KEY); + } + sort_info.dupp++; + if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK) + { + param->testflag|=T_RETRY_WITHOUT_QUICK; + param->error_printed=1; + goto err; + } + continue; + } + if (_ma_sort_write_record(&sort_param)) + goto err; + } + if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) || + flush_io_cache(&info->rec_cache) || param->read_cache.error < 0) + goto err; + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0))) + { + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", + my_errno); + goto err; + } + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + got_error=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + got_error=1; + goto err; + } + } + + if (!rep_quick) + { + my_close(info->dfile,MYF(0)); + info->dfile=new_file; + info->state->data_file_length=sort_param.filepos; + share->state.version=(ulong) time((time_t*) 0); /* Force reopen */ + } + else + { + info->state->data_file_length=sort_param.max_pos; + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + + got_error=0; + /* If invoked by external program that uses thr_lock */ + if (&share->state.state != info->state) + memcpy( &share->state.state, info->state, sizeof(*info->state)); + +err: + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile=new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, share->base.raid_chunks, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d for record at pos %s",my_errno, + llstr(sort_param.start_recpos,llbuff)); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_raid_delete(param->temp_filename,info->s->base.raid_chunks, + MYF(MY_WME))); + info->rec_cache.file=-1; /* don't flush data to new_file, it's closed */ + } + maria_mark_crashed_on_repair(info); + } + my_free(_ma_get_rec_buff_ptr(info, sort_param.rec_buff), + MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + VOID(end_io_cache(&info->rec_cache)); + got_error|=_ma_flush_blocks(param, share->key_cache, share->kfile); + if (!got_error && param->testflag & T_UNPACK) + { + share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; + share->pack.header_length=0; + share->data_file_type=sort_info.new_data_file_type; + } + share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES | + STATE_NOT_ANALYZED); + DBUG_RETURN(got_error); +} + + +/* Uppate keyfile when doing repair */ + +static int writekeys(HA_CHECK *param, register MARIA_HA *info, byte *buff, + my_off_t filepos) +{ + register uint i; + uchar *key; + DBUG_ENTER("writekeys"); + + key=info->lastkey+info->s->base.max_key_length; + for (i=0 ; i < info->s->base.keys ; i++) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_add(info,i,(char*) key,buff,filepos)) + goto err; + } +#ifdef HAVE_SPATIAL + else if (info->s->keyinfo[i].flag & HA_SPATIAL) + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (maria_rtree_insert(info, i, key, key_length)) + goto err; + } +#endif /*HAVE_SPATIAL*/ + else + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (_ma_ck_write(info,i,key,key_length)) + goto err; + } + } + } + DBUG_RETURN(0); + + err: + if (my_errno == HA_ERR_FOUND_DUPP_KEY) + { + info->errkey=(int) i; /* This key was found */ + while ( i-- > 0 ) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + if (info->s->keyinfo[i].flag & HA_FULLTEXT) + { + if (_ma_ft_del(info,i,(char*) key,buff,filepos)) + break; + } + else + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (_ma_ck_delete(info,i,key,key_length)) + break; + } + } + } + } + /* Remove checksum that was added to glob_crc in sort_get_next_record */ + if (param->calc_checksum) + param->glob_crc-= info->checksum; + DBUG_PRINT("error",("errno: %d",my_errno)); + DBUG_RETURN(-1); +} /* writekeys */ + + + /* Change all key-pointers that points to a records */ + +int maria_movepoint(register MARIA_HA *info, byte *record, my_off_t oldpos, + my_off_t newpos, uint prot_key) +{ + register uint i; + uchar *key; + uint key_length; + DBUG_ENTER("maria_movepoint"); + + key=info->lastkey+info->s->base.max_key_length; + for (i=0 ; i < info->s->base.keys; i++) + { + if (i != prot_key && maria_is_key_active(info->s->state.key_map, i)) + { + key_length= _ma_make_key(info,i,key,record,oldpos); + if (info->s->keyinfo[i].flag & HA_NOSAME) + { /* Change pointer direct */ + uint nod_flag; + MARIA_KEYDEF *keyinfo; + keyinfo=info->s->keyinfo+i; + if (_ma_search(info,keyinfo,key,USE_WHOLE_KEY, + (uint) (SEARCH_SAME | SEARCH_SAVE_BUFF), + info->s->state.key_root[i])) + DBUG_RETURN(-1); + nod_flag=_ma_test_if_nod(info->buff); + _ma_dpointer(info,info->int_keypos-nod_flag- + info->s->rec_reflength,newpos); + if (_ma_write_keypage(info,keyinfo,info->last_keypage, + DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + } + else + { /* Change old key to new */ + if (_ma_ck_delete(info,i,key,key_length)) + DBUG_RETURN(-1); + key_length= _ma_make_key(info,i,key,record,newpos); + if (_ma_ck_write(info,i,key,key_length)) + DBUG_RETURN(-1); + } + } + } + DBUG_RETURN(0); +} /* maria_movepoint */ + + + /* Tell system that we want all memory for our cache */ + +void maria_lock_memory(HA_CHECK *param __attribute__((unused))) +{ +#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */ + if (param->opt_maria_lock_memory) + { + int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */ + if (geteuid() == 0 && success != 0) + _ma_check_print_warning(param, + "Failed to lock memory. errno %d",my_errno); + } +#endif +} /* maria_lock_memory */ + + + /* Flush all changed blocks to disk */ + +int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file) +{ + if (flush_key_blocks(key_cache, file, FLUSH_RELEASE)) + { + _ma_check_print_error(param,"%d when trying to write bufferts",my_errno); + return(1); + } + if (!param->using_global_keycache) + end_key_cache(key_cache,1); + return 0; +} /* _ma_flush_blocks */ + + + /* Sort index for more efficent reads */ + +int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) +{ + reg2 uint key; + reg1 MARIA_KEYDEF *keyinfo; + File new_file; + my_off_t index_pos[HA_MAX_POSSIBLE_KEY]; + uint r_locks,w_locks; + int old_lock; + MARIA_SHARE *share=info->s; + MARIA_STATE_INFO old_state; + DBUG_ENTER("maria_sort_index"); + + if (!(param->testflag & T_SILENT)) + printf("- Sorting index for MARIA-table '%s'\n",name); + + /* Get real path for index file */ + fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32); + if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename, + "", INDEX_TMP_EXT,2+4), + 0,param->tmpfile_createflag,MYF(0))) <= 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + DBUG_RETURN(-1); + } + if (maria_filecopy(param, new_file,share->kfile,0L, + (ulong) share->base.keystart, "headerblock")) + goto err; + + param->new_file_pos=share->base.keystart; + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + key++,keyinfo++) + { + if (! maria_is_key_active(info->s->state.key_map, key)) + continue; + + if (share->state.key_root[key] != HA_OFFSET_ERROR) + { + index_pos[key]=param->new_file_pos; /* Write first block here */ + if (sort_one_index(param,info,keyinfo,share->state.key_root[key], + new_file)) + goto err; + } + else + index_pos[key]= HA_OFFSET_ERROR; /* No blocks */ + } + + /* Flush key cache for this file if we are calling this outside mariachk */ + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); + + share->state.version=(ulong) time((time_t*) 0); + old_state= share->state; /* save state if not stored */ + r_locks= share->r_locks; + w_locks= share->w_locks; + old_lock= info->lock_type; + + /* Put same locks as old file */ + share->r_locks= share->w_locks= share->tot_locks= 0; + (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE); + VOID(my_close(share->kfile,MYF(MY_WME))); + share->kfile = -1; + VOID(my_close(new_file,MYF(MY_WME))); + if (maria_change_to_newfile(share->index_file_name,MARIA_NAME_IEXT,INDEX_TMP_EXT,0, + MYF(0)) || + _ma_open_keyfile(share)) + goto err2; + info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */ + _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */ + info->lock_type= old_lock; + share->r_locks= r_locks; + share->w_locks= w_locks; + share->tot_locks= r_locks+w_locks; + share->state= old_state; /* Restore old state */ + + info->state->key_file_length=param->new_file_pos; + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + for (key=0 ; key < info->s->base.keys ; key++) + info->s->state.key_root[key]=index_pos[key]; + for (key=0 ; key < info->s->state.header.max_block_size ; key++) + info->s->state.key_del[key]= HA_OFFSET_ERROR; + + info->s->state.changed&= ~STATE_NOT_SORTED_PAGES; + DBUG_RETURN(0); + +err: + VOID(my_close(new_file,MYF(MY_WME))); +err2: + VOID(my_delete(param->temp_filename,MYF(MY_WME))); + DBUG_RETURN(-1); +} /* maria_sort_index */ + + + /* Sort records recursive using one index */ + +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pagepos, File new_file) +{ + uint length,nod_flag,used_length, key_length; + uchar *buff,*keypos,*endpos; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF]; + my_off_t new_page_pos,next_page; + char llbuff[22]; + DBUG_ENTER("sort_one_index"); + + new_page_pos=param->new_file_pos; + param->new_file_pos+=keyinfo->block_length; + + if (!(buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for key block"); + DBUG_RETURN(-1); + } + if (!_ma_fetch_keypage(info,keyinfo,pagepos,DFLT_INIT_HITS,buff,0)) + { + _ma_check_print_error(param,"Can't read key block from filepos: %s", + llstr(pagepos,llbuff)); + goto err; + } + if ((nod_flag=_ma_test_if_nod(buff)) || keyinfo->flag & HA_FULLTEXT) + { + used_length=maria_getint(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + for ( ;; ) + { + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + /* Save new pos */ + _ma_kpointer(info,keypos-nod_flag,param->new_file_pos); + if (sort_one_index(param,info,keyinfo,next_page,new_file)) + { + DBUG_PRINT("error", + ("From page: %ld, keyoffset: %lu used_length: %d", + (ulong) pagepos, (ulong) (keypos - buff), + (int) used_length)); + DBUG_DUMP("buff",(byte*) buff,used_length); + goto err; + } + } + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0) + break; + DBUG_ASSERT(keypos <= endpos); + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + get_key_full_length_rdonly(off, key); + subkeys=ft_sintXkorr(key+off); + if (subkeys < 0) + { + next_page= _ma_dpos(info,0,key+key_length); + _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength, + param->new_file_pos); /* Save new pos */ + if (sort_one_index(param,info,&info->s->ft2_keyinfo, + next_page,new_file)) + goto err; + } + } + } + } + + /* Fill block with zero and write it to the new index file */ + length=maria_getint(buff); + bzero((byte*) buff+length,keyinfo->block_length-length); + if (my_pwrite(new_file,(byte*) buff,(uint) keyinfo->block_length, + new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL))) + { + _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno); + goto err; + } + my_afree((gptr) buff); + DBUG_RETURN(0); +err: + my_afree((gptr) buff); + DBUG_RETURN(1); +} /* sort_one_index */ + + + /* + Let temporary file replace old file. + This assumes that the new file was created in the same + directory as given by realpath(filename). + This will ensure that any symlinks that are used will still work. + Copy stats from old file to new file, deletes orignal and + changes new file name to old file name + */ + +int maria_change_to_newfile(const char * filename, const char * old_ext, + const char * new_ext, + uint raid_chunks __attribute__((unused)), + myf MyFlags) +{ + char old_filename[FN_REFLEN],new_filename[FN_REFLEN]; +#ifdef USE_RAID + if (raid_chunks) + return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4), + fn_format(new_filename,filename,"",new_ext,2+4), + raid_chunks, + MYF(MY_WME | MY_LINK_WARNING | MyFlags)); +#endif + /* Get real path to filename */ + (void) fn_format(old_filename,filename,"",old_ext,2+4+32); + return my_redel(old_filename, + fn_format(new_filename,old_filename,"",new_ext,2+4), + MYF(MY_WME | MY_LINK_WARNING | MyFlags)); +} /* maria_change_to_newfile */ + + + /* Locks a whole file */ + /* Gives an error-message if file can't be locked */ + +int maria_lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, + const char *filetype, const char *filename) +{ + if (my_lock(file,lock_type,start,F_TO_EOF, + param->testflag & T_WAIT_FOREVER ? MYF(MY_SEEK_NOT_DONE) : + MYF(MY_SEEK_NOT_DONE | MY_DONT_WAIT))) + { + _ma_check_print_error(param," %d when locking %s '%s'",my_errno,filetype,filename); + param->error_printed=2; /* Don't give that data is crashed */ + return 1; + } + return 0; +} /* maria_lock_file */ + + + /* Copy a block between two files */ + +int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start, + my_off_t length, const char *type) +{ + char tmp_buff[IO_SIZE],*buff; + ulong buff_length; + DBUG_ENTER("maria_filecopy"); + + buff_length=(ulong) min(param->write_buffer_length,length); + if (!(buff=my_malloc(buff_length,MYF(0)))) + { + buff=tmp_buff; buff_length=IO_SIZE; + } + + VOID(my_seek(from,start,MY_SEEK_SET,MYF(0))); + while (length > buff_length) + { + if (my_read(from,(byte*) buff,buff_length,MYF(MY_NABP)) || + my_write(to,(byte*) buff,buff_length,param->myf_rw)) + goto err; + length-= buff_length; + } + if (my_read(from,(byte*) buff,(uint) length,MYF(MY_NABP)) || + my_write(to,(byte*) buff,(uint) length,param->myf_rw)) + goto err; + if (buff != tmp_buff) + my_free(buff,MYF(0)); + DBUG_RETURN(0); +err: + if (buff != tmp_buff) + my_free(buff,MYF(0)); + _ma_check_print_error(param,"Can't copy %s to tempfile, error %d", + type,my_errno); + DBUG_RETURN(1); +} + + +/* + Repair table or given index using sorting + + SYNOPSIS + maria_repair_by_sort() + param Repair parameters + info MARIA handler to repair + name Name of table (for warnings) + rep_quick set to <> 0 if we should not change data file + + RESULT + 0 ok + <>0 Error +*/ + +int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, + const char * name, int rep_quick) +{ + int got_error; + uint i; + ulong length; + ha_rows start_records; + my_off_t new_header_length,del; + File new_file; + MARIA_SORT_PARAM sort_param; + MARIA_SHARE *share=info->s; + HA_KEYSEG *keyseg; + ulong *rec_per_key_part; + char llbuff[22]; + MARIA_SORT_INFO sort_info; + ulonglong key_map=share->state.key_map; + DBUG_ENTER("maria_repair_by_sort"); + + start_records=info->state->records; + got_error=1; + new_file= -1; + new_header_length=(param->testflag & T_UNPACK) ? 0 : + share->pack.header_length; + if (!(param->testflag & T_SILENT)) + { + printf("- recovering (with sort) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(start_records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + if (!(sort_info.key_block= + alloc_key_blocks(param, + (uint) param->sort_key_blocks, + share->base.max_key_block_length)) + || init_io_cache(¶m->read_cache,info->dfile, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + (! rep_quick && + init_io_cache(&info->rec_cache,info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE,new_header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + goto err; + sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; + info->opt_flag|=WRITE_CACHE_USED; + info->rec_cache.file=info->dfile; /* for sort_delete_record */ + + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + !_ma_alloc_rec_buff(info, -1, &sort_param.rec_buff)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file=my_raid_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + share->base.raid_type, + share->base.raid_chunks, + share->base.raid_chunksize, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) + goto err; + if (param->testflag & T_UNPACK) + { + share->options&= ~HA_OPTION_COMPRESS_RECORD; + mi_int2store(share->state.header.options,share->options); + } + share->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + } + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (!(param->testflag & T_CREATE_MISSING_KEYS)) + { + /* + Flush key cache for this file if we are calling this outside + mariachk + */ + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); + /* Clear the pointers to the given rows */ + for (i=0 ; i < share->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + for (i=0 ; i < share->state.header.max_block_size ; i++) + share->state.key_del[i]= HA_OFFSET_ERROR; + info->state->key_file_length=share->base.keystart; + } + else + { + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE)) + goto err; + key_map= ~key_map; /* Create the missing keys */ + } + + sort_info.info=info; + sort_info.param = param; + + set_data_file_type(&sort_info, share); + sort_param.filepos=new_header_length; + sort_info.dupp=0; + sort_info.buff=0; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); + + sort_param.wordlist=NULL; + + if (share->data_file_type == DYNAMIC_RECORD) + length=max(share->base.min_pack_length+1,share->base.min_block_length); + else if (share->data_file_type == COMPRESSED_RECORD) + length=share->base.min_block_length; + else + length=share->base.pack_reclength; + sort_info.max_records= + ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records : + (ha_rows) (sort_info.filelength/length+1)); + sort_param.key_cmp=sort_key_cmp; + sort_param.lock_in_memory=maria_lock_memory; + sort_param.tmpdir=param->tmpdir; + sort_param.sort_info=&sort_info; + sort_param.fix_datafile= (my_bool) (! rep_quick); + sort_param.master =1; + + del=info->state->del; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + param->calc_checksum=1; + + rec_per_key_part= param->rec_per_key_part; + for (sort_param.key=0 ; sort_param.key < share->base.keys ; + rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++) + { + sort_param.read_cache=param->read_cache; + sort_param.keyinfo=share->keyinfo+sort_param.key; + sort_param.seg=sort_param.keyinfo->seg; + if (! maria_is_key_active(key_map, sort_param.key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part + + (uint) (rec_per_key_part - param->rec_per_key_part)), + sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part)); + continue; + } + + if ((!(param->testflag & T_SILENT))) + printf ("- Fixing index %d\n",sort_param.key+1); + sort_param.max_pos=sort_param.pos=share->pack.header_length; + keyseg=sort_param.seg; + bzero((char*) sort_param.unique,sizeof(sort_param.unique)); + sort_param.key_length=share->rec_reflength; + for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++) + { + sort_param.key_length+=keyseg[i].length; + if (keyseg[i].flag & HA_SPACE_PACK) + sort_param.key_length+=get_pack_length(keyseg[i].length); + if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + sort_param.key_length+=2 + test(keyseg[i].length >= 127); + if (keyseg[i].flag & HA_NULL_PART) + sort_param.key_length++; + } + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + + if (sort_param.keyinfo->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + sort_param.keyinfo->seg->charset->mbmaxlen; + sort_info.max_records= + (ha_rows) (sort_info.filelength/ft_min_word_len+1); + + sort_param.key_read=sort_maria_ft_key_read; + sort_param.key_write=sort_maria_ft_key_write; + sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + } + else + { + sort_param.key_read=sort_key_read; + sort_param.key_write=sort_key_write; + } + + if (_ma_create_index_by_sort(&sort_param, + (my_bool) (!(param->testflag & T_VERBOSE)), + (uint) param->sort_buffer_length)) + { + param->retry_repair=1; + goto err; + } + param->calc_checksum=0; /* No need to calc glob_crc */ + + /* Set for next loop */ + sort_info.max_records= (ha_rows) info->state->records; + + if (param->testflag & T_STATISTICS) + maria_update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sort_param.notnull: NULL,(ulonglong) info->state->records); + maria_set_key_active(share->state.key_map, sort_param.key); + + if (sort_param.fix_datafile) + { + param->read_cache.end_of_file=sort_param.filepos; + if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) + goto err; + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + goto err; + } + } + share->state.state.data_file_length = info->state->data_file_length= + sort_param.filepos; + /* Only whole records */ + share->state.version=(ulong) time((time_t*) 0); + my_close(info->dfile,MYF(0)); + info->dfile=new_file; + share->data_file_type=sort_info.new_data_file_type; + share->pack.header_length=(ulong) new_header_length; + sort_param.fix_datafile=0; + } + else + info->state->data_file_length=sort_param.max_pos; + + param->read_cache.file=info->dfile; /* re-init read cache */ + reinit_io_cache(¶m->read_cache,READ_CACHE,share->pack.header_length, + 1,1); + } + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + got_error=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + + if (rep_quick & T_FORCE_UNIQUENESS) + { + my_off_t skr=info->state->data_file_length+ + (share->options & HA_OPTION_COMPRESS_RECORD ? + MEMMAP_EXTRA_MARGIN : 0); +#ifdef USE_RELOC + if (share->data_file_type == STATIC_RECORD && + skr < share->base.reloc*share->base.min_pack_length) + skr=share->base.reloc*share->base.min_pack_length; +#endif + if (skr != sort_info.filelength && !info->s->base.raid_type) + if (my_chsize(info->dfile,skr,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of datafile, error: %d", + my_errno); + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", + my_errno); + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + got_error=0; + + if (&share->state.state != info->state) + memcpy( &share->state.state, info->state, sizeof(*info->state)); + +err: + got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile); + VOID(end_io_cache(&info->rec_cache)); + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile=new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, share->base.raid_chunks, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d when fixing table",my_errno); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_raid_delete(param->temp_filename,share->base.raid_chunks, + MYF(MY_WME))); + if (info->dfile == new_file) + info->dfile= -1; + } + maria_mark_crashed_on_repair(info); + } + else if (key_map == share->state.key_map) + share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; + share->state.changed|=STATE_NOT_SORTED_PAGES; + + my_free(_ma_get_rec_buff_ptr(info, sort_param.rec_buff), + MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + if (!got_error && (param->testflag & T_UNPACK)) + { + share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; + share->pack.header_length=0; + } + DBUG_RETURN(got_error); +} + +/* + Threaded repair of table using sorting + + SYNOPSIS + maria_repair_parallel() + param Repair parameters + info MARIA handler to repair + name Name of table (for warnings) + rep_quick set to <> 0 if we should not change data file + + DESCRIPTION + Same as maria_repair_by_sort but do it multithreaded + Each key is handled by a separate thread. + TODO: make a number of threads a parameter + + RESULT + 0 ok + <>0 Error +*/ + +int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, + const char * name, int rep_quick) +{ +#ifndef THREAD + return maria_repair_by_sort(param, info, name, rep_quick); +#else + int got_error; + uint i,key, total_key_length, istep; + ulong rec_length; + ha_rows start_records; + my_off_t new_header_length,del; + File new_file; + MARIA_SORT_PARAM *sort_param=0; + MARIA_SHARE *share=info->s; + ulong *rec_per_key_part; + HA_KEYSEG *keyseg; + char llbuff[22]; + IO_CACHE_SHARE io_share; + MARIA_SORT_INFO sort_info; + ulonglong key_map=share->state.key_map; + pthread_attr_t thr_attr; + DBUG_ENTER("maria_repair_parallel"); + + start_records=info->state->records; + got_error=1; + new_file= -1; + new_header_length=(param->testflag & T_UNPACK) ? 0 : + share->pack.header_length; + if (!(param->testflag & T_SILENT)) + { + printf("- parallel recovering (with sort) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(start_records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + bzero((char*)&sort_info,sizeof(sort_info)); + if (!(sort_info.key_block= + alloc_key_blocks(param, + (uint) param->sort_key_blocks, + share->base.max_key_block_length)) + || init_io_cache(¶m->read_cache,info->dfile, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + (! rep_quick && + init_io_cache(&info->rec_cache,info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE,new_header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + goto err; + sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; + info->opt_flag|=WRITE_CACHE_USED; + info->rec_cache.file=info->dfile; /* for sort_delete_record */ + + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file=my_raid_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, + 2+4), + 0,param->tmpfile_createflag, + share->base.raid_type, + share->base.raid_chunks, + share->base.raid_chunksize, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) + goto err; + if (param->testflag & T_UNPACK) + { + share->options&= ~HA_OPTION_COMPRESS_RECORD; + mi_int2store(share->state.header.options,share->options); + } + share->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + } + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (!(param->testflag & T_CREATE_MISSING_KEYS)) + { + /* + Flush key cache for this file if we are calling this outside + mariachk + */ + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); + /* Clear the pointers to the given rows */ + for (i=0 ; i < share->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + for (i=0 ; i < share->state.header.max_block_size ; i++) + share->state.key_del[i]= HA_OFFSET_ERROR; + info->state->key_file_length=share->base.keystart; + } + else + { + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE)) + goto err; + key_map= ~key_map; /* Create the missing keys */ + } + + sort_info.info=info; + sort_info.param = param; + + set_data_file_type(&sort_info, share); + sort_info.dupp=0; + sort_info.buff=0; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); + + if (share->data_file_type == DYNAMIC_RECORD) + rec_length=max(share->base.min_pack_length+1,share->base.min_block_length); + else if (share->data_file_type == COMPRESSED_RECORD) + rec_length=share->base.min_block_length; + else + rec_length=share->base.pack_reclength; + /* + +1 below is required hack for parallel repair mode. + The info->state->records value, that is compared later + to sort_info.max_records and cannot exceed it, is + increased in sort_key_write. In maria_repair_by_sort, sort_key_write + is called after sort_key_read, where the comparison is performed, + but in parallel mode master thread can call sort_key_write + before some other repair thread calls sort_key_read. + Furthermore I'm not even sure +1 would be enough. + May be sort_info.max_records shold be always set to max value in + parallel mode. + */ + sort_info.max_records= + ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records + 1: + (ha_rows) (sort_info.filelength/rec_length+1)); + + del=info->state->del; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + param->calc_checksum=1; + + if (!(sort_param=(MARIA_SORT_PARAM *) + my_malloc((uint) share->base.keys * + (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength), + MYF(MY_ZEROFILL)))) + { + _ma_check_print_error(param,"Not enough memory for key!"); + goto err; + } + total_key_length=0; + rec_per_key_part= param->rec_per_key_part; + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + + for (i=key=0, istep=1 ; key < share->base.keys ; + rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++) + { + sort_param[i].key=key; + sort_param[i].keyinfo=share->keyinfo+key; + sort_param[i].seg=sort_param[i].keyinfo->seg; + if (! maria_is_key_active(key_map, key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part+ + (uint) (rec_per_key_part - param->rec_per_key_part)), + sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part)); + istep=0; + continue; + } + istep=1; + if ((!(param->testflag & T_SILENT))) + printf ("- Fixing index %d\n",key+1); + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + sort_param[i].key_read=sort_maria_ft_key_read; + sort_param[i].key_write=sort_maria_ft_key_write; + } + else + { + sort_param[i].key_read=sort_key_read; + sort_param[i].key_write=sort_key_write; + } + sort_param[i].key_cmp=sort_key_cmp; + sort_param[i].lock_in_memory=maria_lock_memory; + sort_param[i].tmpdir=param->tmpdir; + sort_param[i].sort_info=&sort_info; + sort_param[i].master=0; + sort_param[i].fix_datafile=0; + + sort_param[i].filepos=new_header_length; + sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length; + + sort_param[i].record= (((char *)(sort_param+share->base.keys))+ + (share->base.pack_reclength * i)); + if (!_ma_alloc_rec_buff(info, -1, &sort_param[i].rec_buff)) + { + _ma_check_print_error(param,"Not enough memory!"); + goto err; + } + + sort_param[i].key_length=share->rec_reflength; + for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END; + keyseg++) + { + sort_param[i].key_length+=keyseg->length; + if (keyseg->flag & HA_SPACE_PACK) + sort_param[i].key_length+=get_pack_length(keyseg->length); + if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + sort_param[i].key_length+=2 + test(keyseg->length >= 127); + if (keyseg->flag & HA_NULL_PART) + sort_param[i].key_length++; + } + total_key_length+=sort_param[i].key_length; + + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + sort_param[i].keyinfo->seg->charset->mbmaxlen; + sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + } + } + sort_info.total_keys=i; + sort_param[0].master= 1; + sort_param[0].fix_datafile= (my_bool)(! rep_quick); + + sort_info.got_error=0; + pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&sort_info.cond, 0); + pthread_mutex_lock(&sort_info.mutex); + + init_io_cache_share(¶m->read_cache, &io_share, i); + (void) pthread_attr_init(&thr_attr); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + + for (i=0 ; i < sort_info.total_keys ; i++) + { + sort_param[i].read_cache=param->read_cache; + /* + two approaches: the same amount of memory for each thread + or the memory for the same number of keys for each thread... + In the second one all the threads will fill their sort_buffers + (and call write_keys) at the same time, putting more stress on i/o. + */ + sort_param[i].sortbuff_size= +#ifndef USING_SECOND_APPROACH + param->sort_buffer_length/sort_info.total_keys; +#else + param->sort_buffer_length*sort_param[i].key_length/total_key_length; +#endif + if (pthread_create(&sort_param[i].thr, &thr_attr, + _ma_thr_find_all_keys, + (void *) (sort_param+i))) + { + _ma_check_print_error(param,"Cannot start a repair thread"); + remove_io_thread(¶m->read_cache); + sort_info.got_error=1; + } + else + sort_info.threads_running++; + } + (void) pthread_attr_destroy(&thr_attr); + + /* waiting for all threads to finish */ + while (sort_info.threads_running) + pthread_cond_wait(&sort_info.cond, &sort_info.mutex); + pthread_mutex_unlock(&sort_info.mutex); + + if ((got_error= _ma_thr_write_keys(sort_param))) + { + param->retry_repair=1; + goto err; + } + got_error=1; /* Assume the following may go wrong */ + + if (sort_param[0].fix_datafile) + { + if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) + goto err; + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + goto err; + } + } + share->state.state.data_file_length= info->state->data_file_length= + sort_param->filepos; + /* Only whole records */ + share->state.version=(ulong) time((time_t*) 0); + my_close(info->dfile,MYF(0)); + info->dfile=new_file; + share->data_file_type=sort_info.new_data_file_type; + share->pack.header_length=(ulong) new_header_length; + } + else + info->state->data_file_length=sort_param->max_pos; + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + + if (rep_quick & T_FORCE_UNIQUENESS) + { + my_off_t skr=info->state->data_file_length+ + (share->options & HA_OPTION_COMPRESS_RECORD ? + MEMMAP_EXTRA_MARGIN : 0); +#ifdef USE_RELOC + if (share->data_file_type == STATIC_RECORD && + skr < share->base.reloc*share->base.min_pack_length) + skr=share->base.reloc*share->base.min_pack_length; +#endif + if (skr != sort_info.filelength && !info->s->base.raid_type) + if (my_chsize(info->dfile,skr,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of datafile, error: %d", + my_errno); + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", my_errno); + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + got_error=0; + + if (&share->state.state != info->state) + memcpy(&share->state.state, info->state, sizeof(*info->state)); + +err: + got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile); + VOID(end_io_cache(&info->rec_cache)); + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile=new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, share->base.raid_chunks, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d when fixing table",my_errno); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_raid_delete(param->temp_filename,share->base.raid_chunks, + MYF(MY_WME))); + if (info->dfile == new_file) + info->dfile= -1; + } + maria_mark_crashed_on_repair(info); + } + else if (key_map == share->state.key_map) + share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; + share->state.changed|=STATE_NOT_SORTED_PAGES; + + pthread_cond_destroy (&sort_info.cond); + pthread_mutex_destroy(&sort_info.mutex); + + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_param,MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + if (!got_error && (param->testflag & T_UNPACK)) + { + share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; + share->pack.header_length=0; + } + DBUG_RETURN(got_error); +#endif /* THREAD */ +} + + /* Read next record and return next key */ + +static int sort_key_read(MARIA_SORT_PARAM *sort_param, void *key) +{ + int error; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + MARIA_HA *info=sort_info->info; + DBUG_ENTER("sort_key_read"); + + if ((error=sort_get_next_record(sort_param))) + DBUG_RETURN(error); + if (info->state->records == sort_info->max_records) + { + _ma_check_print_error(sort_info->param, + "Key %d - Found too many records; Can't continue", + sort_param->key+1); + DBUG_RETURN(1); + } + sort_param->real_key_length= + (info->s->rec_reflength+ + _ma_make_key(info, sort_param->key, (uchar*) key, + sort_param->record, sort_param->filepos)); +#ifdef HAVE_purify + bzero(key+sort_param->real_key_length, + (sort_param->key_length-sort_param->real_key_length)); +#endif + DBUG_RETURN(_ma_sort_write_record(sort_param)); +} /* sort_key_read */ + +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, void *key) +{ + int error; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + MARIA_HA *info=sort_info->info; + FT_WORD *wptr=0; + DBUG_ENTER("sort_maria_ft_key_read"); + + if (!sort_param->wordlist) + { + for (;;) + { + my_free((char*) wptr, MYF(MY_ALLOW_ZERO_PTR)); + if ((error=sort_get_next_record(sort_param))) + DBUG_RETURN(error); + if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record))) + DBUG_RETURN(1); + if (wptr->pos) + break; + error=_ma_sort_write_record(sort_param); + } + sort_param->wordptr=sort_param->wordlist=wptr; + } + else + { + error=0; + wptr=(FT_WORD*)(sort_param->wordptr); + } + + sort_param->real_key_length=(info->s->rec_reflength+ + _ma_ft_make_key(info, sort_param->key, + key, wptr++, sort_param->filepos)); +#ifdef HAVE_purify + if (sort_param->key_length > sort_param->real_key_length) + bzero(key+sort_param->real_key_length, + (sort_param->key_length-sort_param->real_key_length)); +#endif + if (!wptr->pos) + { + my_free((char*) sort_param->wordlist, MYF(0)); + sort_param->wordlist=0; + error=_ma_sort_write_record(sort_param); + } + else + sort_param->wordptr=(void*)wptr; + + DBUG_RETURN(error); +} /* sort_maria_ft_key_read */ + + + /* Read next record from file using parameters in sort_info */ + /* Return -1 if end of file, 0 if ok and > 0 if error */ + +static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) +{ + int searching; + uint found_record,b_type,left_length; + my_off_t pos; + byte *to; + MARIA_BLOCK_INFO block_info; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + DBUG_ENTER("sort_get_next_record"); + + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + + switch (share->data_file_type) { + case STATIC_RECORD: + for (;;) + { + if (my_b_read(&sort_param->read_cache,sort_param->record, + share->base.pack_reclength)) + { + if (sort_param->read_cache.error) + param->out_flag |= O_DATA_LOST; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(-1); + } + sort_param->start_recpos=sort_param->pos; + if (!sort_param->fix_datafile) + { + sort_param->filepos=sort_param->pos; + if (sort_param->master) + share->state.split++; + } + sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength); + if (*sort_param->record) + { + if (param->calc_checksum) + param->glob_crc+= (info->checksum= + _ma_static_checksum(info,sort_param->record)); + DBUG_RETURN(0); + } + if (!sort_param->fix_datafile && sort_param->master) + { + info->state->del++; + info->state->empty+=share->base.pack_reclength; + } + } + case DYNAMIC_RECORD: + LINT_INIT(to); + pos=sort_param->pos; + searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); + for (;;) + { + found_record=block_info.second_read= 0; + left_length=1; + if (searching) + { + pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE); + param->testflag|=T_RETRY_WITHOUT_QUICK; + sort_param->start_recpos=pos; + } + do + { + if (pos > sort_param->max_pos) + sort_param->max_pos=pos; + if (pos & (MARIA_DYN_ALIGN_SIZE-1)) + { + if ((param->testflag & T_VERBOSE) || searching == 0) + _ma_check_print_info(param,"Wrong aligned block at %s", + llstr(pos,llbuff)); + if (searching) + goto try_next; + } + if (found_record && pos == param->search_after_block) + _ma_check_print_info(param,"Block: %s used by record at %s", + llstr(param->search_after_block,llbuff), + llstr(sort_param->start_recpos,llbuff2)); + if (_ma_read_cache(&sort_param->read_cache, + (byte*) block_info.header,pos, + MARIA_BLOCK_INFO_HEADER_LENGTH, + (! found_record ? READING_NEXT : 0) | + READING_HEADER)) + { + if (found_record) + { + _ma_check_print_info(param, + "Can't read whole record at %s (errno: %d)", + llstr(sort_param->start_recpos,llbuff),errno); + goto try_next; + } + DBUG_RETURN(-1); + } + if (searching && ! sort_param->fix_datafile) + { + param->error_printed=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); /* Something wrong with data */ + } + b_type= _ma_get_block_info(&block_info,-1,pos); + if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) || + ((b_type & BLOCK_FIRST) && + (block_info.rec_len < (uint) share->base.min_pack_length || + block_info.rec_len > (uint) share->base.max_pack_length))) + { + uint i; + if (param->testflag & T_VERBOSE || searching == 0) + _ma_check_print_info(param, + "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped", + block_info.header[0],block_info.header[1], + block_info.header[2],llstr(pos,llbuff)); + if (found_record) + goto try_next; + block_info.second_read=0; + searching=1; + /* Search after block in read header string */ + for (i=MARIA_DYN_ALIGN_SIZE ; + i < MARIA_BLOCK_INFO_HEADER_LENGTH ; + i+= MARIA_DYN_ALIGN_SIZE) + if (block_info.header[i] >= 1 && + block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE) + break; + pos+=(ulong) i; + sort_param->start_recpos=pos; + continue; + } + if (b_type & BLOCK_DELETED) + { + bool error=0; + if (block_info.block_len+ (uint) (block_info.filepos-pos) < + share->base.min_block_length) + { + if (!searching) + _ma_check_print_info(param, + "Deleted block with impossible length %u at %s", + block_info.block_len,llstr(pos,llbuff)); + error=1; + } + else + { + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= + info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + if (!searching) + _ma_check_print_info(param, + "Delete link points outside datafile at %s", + llstr(pos,llbuff)); + error=1; + } + } + if (error) + { + if (found_record) + goto try_next; + searching=1; + pos+= MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + block_info.second_read=0; + continue; + } + } + else + { + if (block_info.block_len+ (uint) (block_info.filepos-pos) < + share->base.min_block_length || + block_info.block_len > (uint) share->base.max_pack_length+ + MARIA_SPLIT_LENGTH) + { + if (!searching) + _ma_check_print_info(param, + "Found block with impossible length %u at %s; Skipped", + block_info.block_len+ (uint) (block_info.filepos-pos), + llstr(pos,llbuff)); + if (found_record) + goto try_next; + searching=1; + pos+= MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + block_info.second_read=0; + continue; + } + } + if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + { + if (!sort_param->fix_datafile && sort_param->master && + (b_type & BLOCK_DELETED)) + { + info->state->empty+=block_info.block_len; + info->state->del++; + share->state.split++; + } + if (found_record) + goto try_next; + if (searching) + { + pos+=MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + } + else + pos=block_info.filepos+block_info.block_len; + block_info.second_read=0; + continue; + } + + if (!sort_param->fix_datafile && sort_param->master) + share->state.split++; + if (! found_record++) + { + sort_param->find_length=left_length=block_info.rec_len; + sort_param->start_recpos=pos; + if (!sort_param->fix_datafile) + sort_param->filepos=sort_param->start_recpos; + if (sort_param->fix_datafile && (param->testflag & T_EXTEND)) + sort_param->pos=block_info.filepos+1; + else + sort_param->pos=block_info.filepos+block_info.block_len; + if (share->base.blobs) + { + if (!(to=_ma_alloc_rec_buff(info,block_info.rec_len, + &(sort_param->rec_buff)))) + { + if (param->max_record_length >= block_info.rec_len) + { + _ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + DBUG_RETURN(1); + } + else + { + _ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + goto try_next; + } + } + } + else + to= sort_param->rec_buff; + } + if (left_length < block_info.data_len || ! block_info.data_len) + { + _ma_check_print_info(param, + "Found block with too small length at %s; Skipped", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (block_info.filepos + block_info.data_len > + sort_param->read_cache.end_of_file) + { + _ma_check_print_info(param, + "Found block that points outside data file at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (_ma_read_cache(&sort_param->read_cache,to,block_info.filepos, + block_info.data_len, + (found_record == 1 ? READING_NEXT : 0))) + { + _ma_check_print_info(param, + "Read error for block at: %s (error: %d); Skipped", + llstr(block_info.filepos,llbuff),my_errno); + goto try_next; + } + left_length-=block_info.data_len; + to+=block_info.data_len; + pos=block_info.next_filepos; + if (pos == HA_OFFSET_ERROR && left_length) + { + _ma_check_print_info(param,"Wrong block with wrong total length starting at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file) + { + _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s", + llstr(pos,llbuff2), + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + } while (left_length); + + if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff, + sort_param->find_length) != MY_FILE_ERROR) + { + if (sort_param->read_cache.error < 0) + DBUG_RETURN(1); + if (info->s->calc_checksum) + info->checksum=_ma_checksum(info,sort_param->record); + if ((param->testflag & (T_EXTEND | T_REP)) || searching) + { + if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, + sort_param->find_length, + (param->testflag & T_QUICK) && + test(info->s->calc_checksum))) + { + _ma_check_print_info(param,"Found wrong packed record at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + } + if (param->calc_checksum) + param->glob_crc+= info->checksum; + DBUG_RETURN(0); + } + if (!searching) + _ma_check_print_info(param,"Key %d - Found wrong stored record at %s", + sort_param->key+1, + llstr(sort_param->start_recpos,llbuff)); + try_next: + pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE); + searching=1; + } + case COMPRESSED_RECORD: + for (searching=0 ;; searching=1, sort_param->pos++) + { + if (_ma_read_cache(&sort_param->read_cache,(byte*) block_info.header, + sort_param->pos, + share->pack.ref_length,READING_NEXT)) + DBUG_RETURN(-1); + if (searching && ! sort_param->fix_datafile) + { + param->error_printed=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); /* Something wrong with data */ + } + sort_param->start_recpos=sort_param->pos; + if (_ma_pack_get_block_info(info,&block_info,-1,sort_param->pos)) + DBUG_RETURN(-1); + if (!block_info.rec_len && + sort_param->pos + MEMMAP_EXTRA_MARGIN == + sort_param->read_cache.end_of_file) + DBUG_RETURN(-1); + if (block_info.rec_len < (uint) share->min_pack_length || + block_info.rec_len > (uint) share->max_pack_length) + { + if (! searching) + _ma_check_print_info(param,"Found block with wrong recordlength: %d at %s\n", + block_info.rec_len, + llstr(sort_param->pos,llbuff)); + continue; + } + if (_ma_read_cache(&sort_param->read_cache,(byte*) sort_param->rec_buff, + block_info.filepos, block_info.rec_len, + READING_NEXT)) + { + if (! searching) + _ma_check_print_info(param,"Couldn't read whole record from %s", + llstr(sort_param->pos,llbuff)); + continue; + } + if (_ma_pack_rec_unpack(info,sort_param->record,sort_param->rec_buff, + block_info.rec_len)) + { + if (! searching) + _ma_check_print_info(param,"Found wrong record at %s", + llstr(sort_param->pos,llbuff)); + continue; + } + info->checksum=_ma_checksum(info,sort_param->record); + if (!sort_param->fix_datafile) + { + sort_param->filepos=sort_param->pos; + if (sort_param->master) + share->state.split++; + } + sort_param->max_pos=(sort_param->pos=block_info.filepos+ + block_info.rec_len); + info->packed_length=block_info.rec_len; + if (param->calc_checksum) + param->glob_crc+= info->checksum; + DBUG_RETURN(0); + } + } + DBUG_RETURN(1); /* Impossible */ +} + + + /* Write record to new file */ + +int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) +{ + int flag; + uint length; + ulong block_length,reclength; + byte *from; + byte block_buff[8]; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_sort_write_record"); + + if (sort_param->fix_datafile) + { + switch (sort_info->new_data_file_type) { + case STATIC_RECORD: + if (my_b_write(&info->rec_cache,sort_param->record, + share->base.pack_reclength)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=share->base.pack_reclength; + info->s->state.split++; + /* sort_info->param->glob_crc+=_ma_static_checksum(info, sort_param->record); */ + break; + case DYNAMIC_RECORD: + if (! info->blobs) + from=sort_param->rec_buff; + else + { + /* must be sure that local buffer is big enough */ + reclength=info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,sort_param->record)+ + ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER; + if (sort_info->buff_length < reclength) + { + if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength, + MYF(MY_FREE_ON_ERROR | + MY_ALLOW_ZERO_PTR)))) + DBUG_RETURN(1); + sort_info->buff_length=reclength; + } + from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); + } + info->checksum=_ma_checksum(info,sort_param->record); + reclength= _ma_rec_pack(info,from,sort_param->record); + flag=0; + /* sort_info->param->glob_crc+=info->checksum; */ + + do + { + block_length=reclength+ 3 + test(reclength >= (65520-3)); + if (block_length < share->base.min_block_length) + block_length=share->base.min_block_length; + info->update|=HA_STATE_WRITE_AT_END; + block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE); + if (block_length > MARIA_MAX_BLOCK_LENGTH) + block_length=MARIA_MAX_BLOCK_LENGTH; + if (_ma_write_part_record(info,0L,block_length, + sort_param->filepos+block_length, + &from,&reclength,&flag)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=block_length; + info->s->state.split++; + } while (reclength); + /* sort_info->param->glob_crc+=info->checksum; */ + break; + case COMPRESSED_RECORD: + reclength=info->packed_length; + length= _ma_save_pack_length((uint) share->pack.version, block_buff, + reclength); + if (info->s->base.blobs) + length+= _ma_save_pack_length((uint) share->pack.version, + block_buff + length, info->blob_length); + if (my_b_write(&info->rec_cache,block_buff,length) || + my_b_write(&info->rec_cache,(byte*) sort_param->rec_buff,reclength)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + /* sort_info->param->glob_crc+=info->checksum; */ + sort_param->filepos+=reclength+length; + info->s->state.split++; + break; + } + } + if (sort_param->master) + { + info->state->records++; + if ((param->testflag & T_WRITE_LOOP) && + (info->state->records % WRITE_COUNT) == 0) + { + char llbuff[22]; + printf("%s\r", llstr(info->state->records,llbuff)); + VOID(fflush(stdout)); + } + } + DBUG_RETURN(0); +} /* _ma_sort_write_record */ + + + /* Compare two keys from _ma_create_index_by_sort */ + +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, + const void *b) +{ + uint not_used[2]; + return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b), + USE_WHOLE_KEY, SEARCH_SAME, not_used)); +} /* sort_key_cmp */ + + +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) +{ + uint diff_pos[2]; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param= sort_info->param; + int cmp; + + if (sort_info->key_block->inited) + { + cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, + diff_pos); + if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) + ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg, + sort_param->notnull, + sort_info->key_block->lastkey, + (uchar*)a); + } + sort_param->unique[diff_pos[0]-1]++; + } + else + { + cmp= -1; + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, + (uchar*)a); + } + if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) + { + sort_info->dupp++; + sort_info->info->lastpos=get_record_for_key(sort_info->info, + sort_param->keyinfo, + (uchar*) a); + _ma_check_print_warning(param, + "Duplicate key for record at %10s against record at %10s", + llstr(sort_info->info->lastpos,llbuff), + llstr(get_record_for_key(sort_info->info, + sort_param->keyinfo, + sort_info->key_block-> + lastkey), + llbuff2)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + if (sort_info->param->testflag & T_VERBOSE) + _ma_print_key(stdout,sort_param->seg,(uchar*) a, USE_WHOLE_KEY); + return (sort_delete_record(sort_param)); + } +#ifndef DBUG_OFF + if (cmp > 0) + { + _ma_check_print_error(param, + "Internal error: Keys are not in order from sort"); + return(1); + } +#endif + return (sort_insert_key(sort_param,sort_info->key_block, + (uchar*) a, HA_OFFSET_ERROR)); +} /* sort_key_write */ + +int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param) +{ + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + SORT_KEY_BLOCKS *key_block=sort_info->key_block; + MARIA_SHARE *share=sort_info->info->s; + uint val_off, val_len; + int error; + SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf; + uchar *from, *to; + + val_len=share->ft2_keyinfo.keylength; + get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey); + to=maria_ft_buf->lastkey+val_off; + + if (maria_ft_buf->buf) + { + /* flushing first-level tree */ + error=sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); + for (from=to+val_len; + !error && from < maria_ft_buf->buf; + from+= val_len) + { + memcpy(to, from, val_len); + error=sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); + } + return error; + } + /* flushing second-level tree keyblocks */ + error=_ma_flush_pending_blocks(sort_param); + /* updating lastkey with second-level tree info */ + ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count); + _ma_dpointer(sort_info->info, maria_ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); + /* restoring first level tree data in sort_info/sort_param */ + sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; + sort_param->keyinfo=share->keyinfo+sort_param->key; + share->state.key_root[sort_param->key]=HA_OFFSET_ERROR; + /* writing lastkey in first-level tree */ + return error ? error : + sort_insert_key(sort_param,sort_info->key_block, + maria_ft_buf->lastkey,HA_OFFSET_ERROR); +} + +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, const void *a) +{ + uint a_len, val_off, val_len, error; + uchar *p; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + SORT_FT_BUF *ft_buf= sort_info->ft_buf; + SORT_KEY_BLOCKS *key_block= sort_info->key_block; + + val_len=HA_FT_WLEN+sort_info->info->s->base.rec_reflength; + get_key_full_length_rdonly(a_len, (uchar *)a); + + if (!ft_buf) + { + /* + use two-level tree only if key_reflength fits in rec_reflength place + and row format is NOT static - for _ma_dpointer not to garble offsets + */ + if ((sort_info->info->s->base.key_reflength <= + sort_info->info->s->base.rec_reflength) && + (sort_info->info->s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length + + sizeof(SORT_FT_BUF), MYF(MY_WME)); + + if (!ft_buf) + { + sort_param->key_write=sort_key_write; + return sort_key_write(sort_param, a); + } + sort_info->ft_buf= ft_buf; + goto word_init_ft_buf; /* no need to duplicate the code */ + } + get_key_full_length_rdonly(val_off, ft_buf->lastkey); + + if (ha_compare_text(sort_param->seg->charset, + ((uchar *)a)+1,a_len-1, + ft_buf->lastkey+1,val_off-1, 0, 0)==0) + { + if (!ft_buf->buf) /* store in second-level tree */ + { + ft_buf->count++; + return sort_insert_key(sort_param,key_block, + ((uchar *)a)+a_len, HA_OFFSET_ERROR); + } + + /* storing the key in the buffer. */ + memcpy (ft_buf->buf, (char *)a+a_len, val_len); + ft_buf->buf+=val_len; + if (ft_buf->buf < ft_buf->end) + return 0; + + /* converting to two-level tree */ + p=ft_buf->lastkey+val_off; + + while (key_block->inited) + key_block++; + sort_info->key_block=key_block; + sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; + ft_buf->count=(ft_buf->buf - p)/val_len; + + /* flushing buffer to second-level tree */ + for (error=0; !error && p < ft_buf->buf; p+= val_len) + error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); + ft_buf->buf=0; + return error; + } + + /* flushing buffer */ + if ((error=_ma_sort_ft_buf_flush(sort_param))) + return error; + +word_init_ft_buf: + a_len+=val_len; + memcpy(ft_buf->lastkey, a, a_len); + ft_buf->buf=ft_buf->lastkey+a_len; + /* + 32 is just a safety margin here + (at least max(val_len, sizeof(nod_flag)) should be there). + May be better performance could be achieved if we'd put + (sort_info->keyinfo->block_length-32)/XXX + instead. + TODO: benchmark the best value for XXX. + */ + ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32); + return 0; +} /* sort_maria_ft_key_write */ + + + /* get pointer to record from a key */ + +static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key) +{ + return _ma_dpos(info,0,key + _ma_keylength(keyinfo,key)); +} /* get_record_for_key */ + + + /* Insert a key in sort-key-blocks */ + +static int sort_insert_key(MARIA_SORT_PARAM *sort_param, + register SORT_KEY_BLOCKS *key_block, uchar *key, + my_off_t prev_block) +{ + uint a_length,t_length,nod_flag; + my_off_t filepos,key_file_length; + uchar *anc_buff,*lastkey; + MARIA_KEY_PARAM s_temp; + MARIA_HA *info; + MARIA_KEYDEF *keyinfo=sort_param->keyinfo; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; + DBUG_ENTER("sort_insert_key"); + + anc_buff=key_block->buff; + info=sort_info->info; + lastkey=key_block->lastkey; + nod_flag= (key_block == sort_info->key_block ? 0 : + info->s->base.key_reflength); + + if (!key_block->inited) + { + key_block->inited=1; + if (key_block == sort_info->key_block_end) + { + _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks"); + DBUG_RETURN(1); + } + a_length=2+nod_flag; + key_block->end_pos=anc_buff+2; + lastkey=0; /* No previous key in block */ + } + else + a_length=maria_getint(anc_buff); + + /* Save pointer to previous block */ + if (nod_flag) + _ma_kpointer(info,key_block->end_pos,prev_block); + + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (uchar*) 0,lastkey,lastkey,key, + &s_temp); + (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); + a_length+=t_length; + maria_putint(anc_buff,a_length,nod_flag); + key_block->end_pos+=t_length; + if (a_length <= keyinfo->block_length) + { + VOID(_ma_move_key(keyinfo,key_block->lastkey,key)); + key_block->last_length=a_length-t_length; + DBUG_RETURN(0); + } + + /* Fill block with end-zero and write filled block */ + maria_putint(anc_buff,key_block->last_length,nod_flag); + bzero((byte*) anc_buff+key_block->last_length, + keyinfo->block_length- key_block->last_length); + key_file_length=info->state->key_file_length; + if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(1); + + /* If we read the page from the key cache, we have to write it back to it */ + if (key_file_length == info->state->key_file_length) + { + if (_ma_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff)) + DBUG_RETURN(1); + } + else if (my_pwrite(info->s->kfile,(byte*) anc_buff, + (uint) keyinfo->block_length,filepos, param->myf_rw)) + DBUG_RETURN(1); + DBUG_DUMP("buff",(byte*) anc_buff,maria_getint(anc_buff)); + + /* Write separator-key to block in next level */ + if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) + DBUG_RETURN(1); + + /* clear old block and write new key in it */ + key_block->inited=0; + DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block)); +} /* sort_insert_key */ + + + /* Delete record when we found a duplicated key */ + +static int sort_delete_record(MARIA_SORT_PARAM *sort_param) +{ + uint i; + int old_file,error; + uchar *key; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + DBUG_ENTER("sort_delete_record"); + + if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK) + { + _ma_check_print_error(param, + "Quick-recover aborted; Run recovery without switch -q or with switch -qq"); + DBUG_RETURN(1); + } + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + { + _ma_check_print_error(param, + "Recover aborted; Can't run standard recovery on compressed tables with errors in data-file. Use switch 'mariachk --safe-recover' to fix it\n",stderr);; + DBUG_RETURN(1); + } + + old_file=info->dfile; + info->dfile=info->rec_cache.file; + if (sort_info->current_key) + { + key=info->lastkey+info->s->base.max_key_length; + if ((error=(*info->s->read_rnd)(info,sort_param->record,info->lastpos,0)) && + error != HA_ERR_RECORD_DELETED) + { + _ma_check_print_error(param,"Can't read record to be removed"); + info->dfile=old_file; + DBUG_RETURN(1); + } + + for (i=0 ; i < sort_info->current_key ; i++) + { + uint key_length= _ma_make_key(info,i,key,sort_param->record,info->lastpos); + if (_ma_ck_delete(info,i,key,key_length)) + { + _ma_check_print_error(param,"Can't delete key %d from record to be removed",i+1); + info->dfile=old_file; + DBUG_RETURN(1); + } + } + if (param->calc_checksum) + param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); + } + error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info); + info->dfile=old_file; /* restore actual value */ + info->state->records--; + DBUG_RETURN(error); +} /* sort_delete_record */ + + /* Fix all pending blocks and flush everything to disk */ + +int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) +{ + uint nod_flag,length; + my_off_t filepos,key_file_length; + SORT_KEY_BLOCKS *key_block; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + myf myf_rw=sort_info->param->myf_rw; + MARIA_HA *info=sort_info->info; + MARIA_KEYDEF *keyinfo=sort_param->keyinfo; + DBUG_ENTER("_ma_flush_pending_blocks"); + + filepos= HA_OFFSET_ERROR; /* if empty file */ + nod_flag=0; + for (key_block=sort_info->key_block ; key_block->inited ; key_block++) + { + key_block->inited=0; + length=maria_getint(key_block->buff); + if (nod_flag) + _ma_kpointer(info,key_block->end_pos,filepos); + key_file_length=info->state->key_file_length; + bzero((byte*) key_block->buff+length, keyinfo->block_length-length); + if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(1); + + /* If we read the page from the key cache, we have to write it back */ + if (key_file_length == info->state->key_file_length) + { + if (_ma_write_keypage(info, keyinfo, filepos, + DFLT_INIT_HITS, key_block->buff)) + DBUG_RETURN(1); + } + else if (my_pwrite(info->s->kfile,(byte*) key_block->buff, + (uint) keyinfo->block_length,filepos, myf_rw)) + DBUG_RETURN(1); + DBUG_DUMP("buff",(byte*) key_block->buff,length); + nod_flag=1; + } + info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */ + DBUG_RETURN(0); +} /* _ma_flush_pending_blocks */ + + /* alloc space and pointers for key_blocks */ + +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, + uint buffer_length) +{ + reg1 uint i; + SORT_KEY_BLOCKS *block; + DBUG_ENTER("alloc_key_blocks"); + + if (!(block=(SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+ + buffer_length+IO_SIZE)*blocks, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for sort-key-blocks"); + return(0); + } + for (i=0 ; i < blocks ; i++) + { + block[i].inited=0; + block[i].buff=(uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i; + } + DBUG_RETURN(block); +} /* alloc_key_blocks */ + + + /* Check if file is almost full */ + +int maria_test_if_almost_full(MARIA_HA *info) +{ + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + return 0; + return (my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0))/10*9 > + (my_off_t) (info->s->base.max_key_file_length) || + my_seek(info->dfile,0L,MY_SEEK_END,MYF(0))/10*9 > + (my_off_t) info->s->base.max_data_file_length); +} + + /* Recreate table with bigger more alloced record-data */ + +int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) +{ + int error; + MARIA_HA info; + MARIA_SHARE share; + MARIA_KEYDEF *keyinfo,*key,*key_end; + HA_KEYSEG *keysegs,*keyseg; + MARIA_COLUMNDEF *recdef,*rec,*end; + MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end; + MARIA_STATUS_INFO status_info; + uint unpack,key_parts; + ha_rows max_records; + ulonglong file_length,tmp_length; + MARIA_CREATE_INFO create_info; + + error=1; /* Default error */ + info= **org_info; + status_info= (*org_info)->state[0]; + info.state= &status_info; + share= *(*org_info)->s; + unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && + (param->testflag & T_UNPACK); + if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF)*share.base.keys))) + return 0; + memcpy((byte*) keyinfo,(byte*) share.keyinfo, + (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys)); + + key_parts= share.base.all_key_parts; + if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)* + (key_parts+share.base.keys)))) + { + my_afree((gptr) keyinfo); + return 1; + } + if (!(recdef=(MARIA_COLUMNDEF*) + my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)))) + { + my_afree((gptr) keyinfo); + my_afree((gptr) keysegs); + return 1; + } + if (!(uniquedef=(MARIA_UNIQUEDEF*) + my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1)))) + { + my_afree((gptr) recdef); + my_afree((gptr) keyinfo); + my_afree((gptr) keysegs); + return 1; + } + + /* Copy the column definitions */ + memcpy((byte*) recdef,(byte*) share.rec, + (size_t) (sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))); + for (rec=recdef,end=recdef+share.base.fields; rec != end ; rec++) + { + if (unpack && !(share.options & HA_OPTION_PACK_RECORD) && + rec->type != FIELD_BLOB && + rec->type != FIELD_VARCHAR && + rec->type != FIELD_CHECK) + rec->type=(int) FIELD_NORMAL; + } + + /* Change the new key to point at the saved key segments */ + memcpy((byte*) keysegs,(byte*) share.keyparts, + (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+ + share.state.header.uniques))); + keyseg=keysegs; + for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++) + { + key->seg=keyseg; + for (; keyseg->type ; keyseg++) + { + if (param->language) + keyseg->language=param->language; /* change language */ + } + keyseg++; /* Skip end pointer */ + } + + /* Copy the unique definitions and change them to point at the new key + segments*/ + memcpy((byte*) uniquedef,(byte*) share.uniqueinfo, + (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques))); + for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques; + u_ptr != u_end ; u_ptr++) + { + u_ptr->seg=keyseg; + keyseg+=u_ptr->keysegs+1; + } + if (share.options & HA_OPTION_COMPRESS_RECORD) + share.base.records=max_records=info.state->records; + else if (share.base.min_pack_length) + max_records=(ha_rows) (my_seek(info.dfile,0L,MY_SEEK_END,MYF(0)) / + (ulong) share.base.min_pack_length); + else + max_records=0; + unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && + (param->testflag & T_UNPACK); + share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD; + + file_length=(ulonglong) my_seek(info.dfile,0L,MY_SEEK_END,MYF(0)); + tmp_length= file_length+file_length/10; + set_if_bigger(file_length,param->max_data_file_length); + set_if_bigger(file_length,tmp_length); + set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length); + + VOID(maria_close(*org_info)); + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=max(max_records,share.base.records); + create_info.reloc_rows=share.base.reloc; + create_info.old_options=(share.options | + (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0)); + + create_info.data_file_length=file_length; + create_info.auto_increment=share.state.auto_increment; + create_info.language = (param->language ? param->language : + share.state.header.language); + create_info.key_file_length= status_info.key_file_length; + /* We don't have to handle symlinks here because we are using + HA_DONT_TOUCH_DATA */ + if (maria_create(filename, + share.base.keys - share.state.header.uniques, + keyinfo, share.base.fields, recdef, + share.state.header.uniques, uniquedef, + &create_info, + HA_DONT_TOUCH_DATA)) + { + _ma_check_print_error(param,"Got error %d when trying to recreate indexfile",my_errno); + goto end; + } + *org_info=maria_open(filename,O_RDWR, + (param->testflag & T_WAIT_FOREVER) ? HA_OPEN_WAIT_IF_LOCKED : + (param->testflag & T_DESCRIPT) ? HA_OPEN_IGNORE_IF_LOCKED : + HA_OPEN_ABORT_IF_LOCKED); + if (!*org_info) + { + _ma_check_print_error(param,"Got error %d when trying to open re-created indexfile", + my_errno); + goto end; + } + /* We are modifing */ + (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA; + VOID(_ma_readinfo(*org_info,F_WRLCK,0)); + (*org_info)->state->records=info.state->records; + if (share.state.create_time) + (*org_info)->s->state.create_time=share.state.create_time; + (*org_info)->s->state.unique=(*org_info)->this_unique= + share.state.unique; + (*org_info)->state->checksum=info.state->checksum; + (*org_info)->state->del=info.state->del; + (*org_info)->s->state.dellink=share.state.dellink; + (*org_info)->state->empty=info.state->empty; + (*org_info)->state->data_file_length=info.state->data_file_length; + if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT | + UPDATE_OPEN_COUNT)) + goto end; + error=0; +end: + my_afree((gptr) uniquedef); + my_afree((gptr) keyinfo); + my_afree((gptr) recdef); + my_afree((gptr) keysegs); + return error; +} + + + /* write suffix to data file if neaded */ + +int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) +{ + MARIA_HA *info=sort_info->info; + + if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile) + { + char buff[MEMMAP_EXTRA_MARGIN]; + bzero(buff,sizeof(buff)); + if (my_b_write(&info->rec_cache,buff,sizeof(buff))) + { + _ma_check_print_error(sort_info->param, + "%d when writing to datafile",my_errno); + return 1; + } + sort_info->param->read_cache.end_of_file+=sizeof(buff); + } + return 0; +} + + /* Update state and mariachk_time of indexfile */ + +int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) +{ + MARIA_SHARE *share=info->s; + + if (update & UPDATE_OPEN_COUNT) + { + share->state.open_count=0; + share->global_changed=0; + } + if (update & UPDATE_STAT) + { + uint i, key_parts= mi_uint2korr(share->state.header.key_parts); + share->state.rec_per_key_rows=info->state->records; + share->state.changed&= ~STATE_NOT_ANALYZED; + if (info->state->records) + { + for (i=0; istate.rec_per_key_part[i]=param->rec_per_key_part[i])) + share->state.changed|= STATE_NOT_ANALYZED; + } + } + } + if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC)) + { + if (update & UPDATE_TIME) + { + share->state.check_time= (long) time((time_t*) 0); + if (!share->state.create_time) + share->state.create_time=share->state.check_time; + } + /* + When tables are locked we haven't synched the share state and the + real state for a while so we better do it here before synching + the share state to disk. Only when table is write locked is it + necessary to perform this synch. + */ + if (info->lock_type == F_WRLCK) + share->state.state= *info->state; + if (_ma_state_info_write(share->kfile,&share->state,1+2)) + goto err; + share->changed=0; + } + { /* Force update of status */ + int error; + uint r_locks=share->r_locks,w_locks=share->w_locks; + share->r_locks= share->w_locks= share->tot_locks= 0; + error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK); + share->r_locks=r_locks; + share->w_locks=w_locks; + share->tot_locks=r_locks+w_locks; + if (!error) + return 0; + } +err: + _ma_check_print_error(param,"%d when updating keyfile",my_errno); + return 1; +} + + /* + Update auto increment value for a table + When setting the 'repair_only' flag we only want to change the + old auto_increment value if its wrong (smaller than some given key). + The reason is that we shouldn't change the auto_increment value + for a table without good reason when only doing a repair; If the + user have inserted and deleted rows, the auto_increment value + may be bigger than the biggest current row and this is ok. + + If repair_only is not set, we will update the flag to the value in + param->auto_increment is bigger than the biggest key. + */ + +void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, + my_bool repair_only) +{ + byte *record; + if (!info->s->base.auto_key || + ! maria_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1)) + { + if (!(param->testflag & T_VERY_SILENT)) + _ma_check_print_info(param, + "Table: %s doesn't have an auto increment key\n", + param->isam_file_name); + return; + } + if (!(param->testflag & T_SILENT) && + !(param->testflag & T_REP)) + printf("Updating MARIA file: %s\n", param->isam_file_name); + /* + We have to use an allocated buffer instead of info->rec_buff as + _ma_put_key_in_record() may use info->rec_buff + */ + if (!(record= (byte*) my_malloc((uint) info->s->base.pack_reclength, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for extra record"); + return; + } + + maria_extra(info,HA_EXTRA_KEYREAD,0); + if (maria_rlast(info, record, info->s->base.auto_key-1)) + { + if (my_errno != HA_ERR_END_OF_FILE) + { + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + my_free((char*) record, MYF(0)); + _ma_check_print_error(param,"%d when reading last record",my_errno); + return; + } + if (!repair_only) + info->s->state.auto_increment=param->auto_increment_value; + } + else + { + ulonglong auto_increment= (repair_only ? info->s->state.auto_increment : + param->auto_increment_value); + info->s->state.auto_increment=0; + _ma_update_auto_increment(info, record); + set_if_bigger(info->s->state.auto_increment,auto_increment); + } + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + my_free((char*) record, MYF(0)); + maria_update_state_info(param, info, UPDATE_AUTO_INC); + return; +} + + +/* + Update statistics for each part of an index + + SYNOPSIS + maria_update_key_parts() + keyinfo IN Index information (only key->keysegs used) + rec_per_key_part OUT Store statistics here + unique IN Array of (#distinct tuples) + notnull_tuples IN Array of (#tuples), or NULL + records Number of records in the table + + DESCRIPTION + This function is called produce index statistics values from unique and + notnull_tuples arrays after these arrays were produced with sequential + index scan (the scan is done in two places: chk_index() and + sort_key_write()). + + This function handles all 3 index statistics collection methods. + + Unique is an array: + unique[0]= (#different values of {keypart1}) - 1 + unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1 + ... + + For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too: + notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL) + notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all + keypart{i} are not NULL) + ... + For all other statistics collection methods notnull_tuples==NULL. + + Output is an array: + rec_per_key_part[k] = + = E(#records in the table such that keypart_1=c_1 AND ... AND + keypart_k=c_k for arbitrary constants c_1 ... c_k) + + = {assuming that values have uniform distribution and index contains all + tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from + index tuples} + + = #tuples-in-the-index / #distinct-tuples-in-the-index. + + The #tuples-in-the-index and #distinct-tuples-in-the-index have different + meaning depending on which statistics collection method is used: + + MI_STATS_METHOD_* how are nulls compared? which tuples are counted? + NULLS_EQUAL NULL == NULL all tuples in table + NULLS_NOT_EQUAL NULL != NULL all tuples in table + IGNORE_NULLS n/a tuples that don't have NULLs +*/ + +void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, + ulonglong *unique, ulonglong *notnull, + ulonglong records) +{ + ulonglong count=0,tmp, unique_tuples; + ulonglong tuples= records; + uint parts; + for (parts=0 ; parts < keyinfo->keysegs ; parts++) + { + count+=unique[parts]; + unique_tuples= count + 1; + if (notnull) + { + tuples= notnull[parts]; + /* + #(unique_tuples not counting tuples with NULLs) = + #(unique_tuples counting tuples with NULLs as different) - + #(tuples with NULLs) + */ + unique_tuples -= (records - notnull[parts]); + } + + if (unique_tuples == 0) + tmp= 1; + else if (count == 0) + tmp= tuples; /* 1 unique tuple */ + else + tmp= (tuples + unique_tuples/2) / unique_tuples; + + /* + for some weird keys (e.g. FULLTEXT) tmp can be <1 here. + let's ensure it is not + */ + set_if_bigger(tmp,1); + if (tmp >= (ulonglong) ~(ulong) 0) + tmp=(ulonglong) ~(ulong) 0; + + *rec_per_key_part=(ulong) tmp; + rec_per_key_part++; + } +} + + +static ha_checksum maria_byte_checksum(const byte *buf, uint length) +{ + ha_checksum crc; + const byte *end=buf+length; + for (crc=0; buf != end; buf++) + crc=((crc << 1) + *((uchar*) buf)) + + test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1))); + return crc; +} + +static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows) +{ + uint key_maxlength=key->maxlength; + if (key->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + key->seg->charset->mbmaxlen; + key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + } + return (key->flag & HA_SPATIAL) || + (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) && + ((ulonglong) rows * key_maxlength > + (ulonglong) maria_max_temp_length)); +} + +/* + Deactivate all not unique index that can be recreated fast + These include packed keys on which sorting will use more temporary + space than the max allowed file length or for which the unpacked keys + will take much more space than packed keys. + Note that 'rows' may be zero for the case when we don't know how many + rows we will put into the file. + */ + +void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + uint i; + + DBUG_ASSERT(info->state->records == 0 && + (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES)); + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && + ! maria_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) + { + maria_clear_key_active(share->state.key_map, i); + info->update|= HA_STATE_CHANGED; + } + } +} + + +/* + Return TRUE if we can use repair by sorting + One can set the force argument to force to use sorting + even if the temporary file would be quite big! +*/ + +my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, + ulonglong key_map, my_bool force) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + uint i; + + /* + maria_repair_by_sort only works if we have at least one key. If we don't + have any keys, we should use the normal repair. + */ + if (! maria_is_any_key_active(key_map)) + return FALSE; /* Can't use sort */ + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!force && maria_too_big_key_for_sort(key,rows)) + return FALSE; + } + return TRUE; +} + + +static void +set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) +{ + if ((sort_info->new_data_file_type=share->data_file_type) == + COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) + { + MARIA_SHARE tmp; + + if (share->options & HA_OPTION_PACK_RECORD) + sort_info->new_data_file_type = DYNAMIC_RECORD; + else + sort_info->new_data_file_type = STATIC_RECORD; + + /* Set delete_function for sort_delete_record() */ + memcpy((char*) &tmp, share, sizeof(*share)); + tmp.options= ~HA_OPTION_COMPRESS_RECORD; + _ma_setup_functions(&tmp); + share->delete_record=tmp.delete_record; + } +} diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c new file mode 100644 index 00000000000..054873706a4 --- /dev/null +++ b/storage/maria/ma_checksum.c @@ -0,0 +1,65 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Calculate a checksum for a row */ + +#include "maria_def.h" + +ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf) +{ + uint i; + ha_checksum crc=0; + MARIA_COLUMNDEF *rec=info->s->rec; + + for (i=info->s->base.fields ; i-- ; buf+=(rec++)->length) + { + const byte *pos; + ulong length; + switch (rec->type) { + case FIELD_BLOB: + { + length= _ma_calc_blob_length(rec->length- + maria_portable_sizeof_char_ptr, + buf); + memcpy((char*) &pos, buf+rec->length- maria_portable_sizeof_char_ptr, + sizeof(char*)); + break; + } + case FIELD_VARCHAR: + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1); + if (pack_length == 1) + length= (ulong) *(uchar*) buf; + else + length= uint2korr(buf); + pos= buf+pack_length; + break; + } + default: + length=rec->length; + pos=buf; + break; + } + crc=my_checksum(crc, pos ? pos : "", length); + } + return crc; +} + + +ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *pos) +{ + return my_checksum(0, pos, info->s->base.reclength); +} diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c new file mode 100644 index 00000000000..f3a1b2ba261 --- /dev/null +++ b/storage/maria/ma_close.c @@ -0,0 +1,124 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* close a isam-database */ +/* + TODO: + We need to have a separate mutex on the closed file to allow other threads + to open other files during the time we flush the cache and close this file +*/ + +#include "maria_def.h" + +int maria_close(register MARIA_HA *info) +{ + int error=0,flag; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_close"); + DBUG_PRINT("enter",("base: %lx reopen: %u locks: %u", + info,(uint) share->reopen, (uint) share->tot_locks)); + + pthread_mutex_lock(&THR_LOCK_maria); + if (info->lock_type == F_EXTRA_LCK) + info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */ + + if (share->reopen == 1 && share->kfile >= 0) + _ma_decrement_open_count(info); + + if (info->lock_type != F_UNLCK) + { + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + } + pthread_mutex_lock(&share->intern_lock); + + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + share->r_locks--; + share->tot_locks--; + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + if (end_io_cache(&info->rec_cache)) + error=my_errno; + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + } + flag= !--share->reopen; + maria_open_list=list_delete(maria_open_list,&info->open_list); + pthread_mutex_unlock(&share->intern_lock); + + my_free(_ma_get_rec_buff_ptr(info, info->rec_buff), MYF(MY_ALLOW_ZERO_PTR)); + if (flag) + { + if (share->kfile >= 0 && + flush_key_blocks(share->key_cache, share->kfile, + share->temporary ? FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE)) + error=my_errno; + if (share->kfile >= 0) + { + /* + If we are crashed, we can safely flush the current state as it will + not change the crashed state. + We can NOT write the state in other cases as other threads + may be using the file at this point + */ + if (share->mode != O_RDONLY && maria_is_crashed(info)) + _ma_state_info_write(share->kfile, &share->state, 1); + if (my_close(share->kfile,MYF(0))) + error = my_errno; + } +#ifdef HAVE_MMAP + if (share->file_map) + _ma_unmap_file(info); +#endif + if (share->decode_trees) + { + my_free((gptr) share->decode_trees,MYF(0)); + my_free((gptr) share->decode_tables,MYF(0)); + } +#ifdef THREAD + thr_lock_delete(&share->lock); + VOID(pthread_mutex_destroy(&share->intern_lock)); + { + int i,keys; + keys = share->state.header.keys; + VOID(rwlock_destroy(&share->mmap_lock)); + for(i=0; ikey_root_lock[i])); + } + } +#endif + my_free((gptr) info->s,MYF(0)); + } + pthread_mutex_unlock(&THR_LOCK_maria); + if (info->ftparser_param) + { + my_free((gptr)info->ftparser_param, MYF(0)); + info->ftparser_param= 0; + } + if (info->dfile >= 0 && my_close(info->dfile,MYF(0))) + error = my_errno; + + maria_log_command(MARIA_LOG_CLOSE,info,NULL,0,error); + my_free((gptr) info,MYF(0)); + + if (error) + { + DBUG_RETURN(my_errno=error); + } + DBUG_RETURN(0); +} /* maria_close */ diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c new file mode 100644 index 00000000000..b15b5b0ae02 --- /dev/null +++ b/storage/maria/ma_create.c @@ -0,0 +1,816 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Create a MARIA table */ + +#include "ma_ftdefs.h" +#include "ma_sp_defs.h" + +#if defined(MSDOS) || defined(__WIN__) +#ifdef __WIN__ +#include +#else +#include /* Prototype for getpid */ +#endif +#endif +#include + + /* + ** Old options is used when recreating database, from isamchk + */ + +int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, + uint columns, MARIA_COLUMNDEF *recinfo, + uint uniques, MARIA_UNIQUEDEF *uniquedefs, + MARIA_CREATE_INFO *ci,uint flags) +{ + register uint i,j; + File dfile,file; + int errpos,save_errno, create_mode= O_RDWR | O_TRUNC; + myf create_flag; + uint fields,length,max_key_length,packed,pointer,real_length_diff, + key_length,info_length,key_segs,options,min_key_length_skip, + base_pos,long_varchar_count,varchar_length, + max_key_block_length,unique_key_parts,fulltext_keys,offset; + ulong reclength, real_reclength,min_pack_length; + char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; + ulong pack_reclength; + ulonglong tot_length,max_rows, tmp; + enum en_fieldtype type; + MARIA_SHARE share; + MARIA_KEYDEF *keydef,tmp_keydef; + MARIA_UNIQUEDEF *uniquedef; + HA_KEYSEG *keyseg,tmp_keyseg; + MARIA_COLUMNDEF *rec; + ulong *rec_per_key_part; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MARIA_MAX_KEY_BLOCK_SIZE]; + MARIA_CREATE_INFO tmp_create_info; + DBUG_ENTER("maria_create"); + + if (!ci) + { + bzero((char*) &tmp_create_info,sizeof(tmp_create_info)); + ci=&tmp_create_info; + } + + if (keys + uniques > MARIA_MAX_KEY || columns == 0) + { + DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION); + } + LINT_INIT(dfile); + LINT_INIT(file); + errpos=0; + options=0; + bzero((byte*) &share,sizeof(share)); + + if (flags & HA_DONT_TOUCH_DATA) + { + if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD)) + options=ci->old_options & + (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD | + HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM | + HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + else + options=ci->old_options & + (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + } + + if (ci->reloc_rows > ci->max_rows) + ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */ + + if (!(rec_per_key_part= + (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(my_errno); + + /* Start by checking fields and field-types used */ + + reclength=varchar_length=long_varchar_count=packed= + min_pack_length=pack_reclength=0; + for (rec=recinfo, fields=0 ; + fields != columns ; + rec++,fields++) + { + reclength+=rec->length; + if ((type=(enum en_fieldtype) rec->type) != FIELD_NORMAL && + type != FIELD_CHECK) + { + packed++; + if (type == FIELD_BLOB) + { + share.base.blobs++; + if (pack_reclength != INT_MAX32) + { + if (rec->length == 4+maria_portable_sizeof_char_ptr) + pack_reclength= INT_MAX32; + else + pack_reclength+=(1 << ((rec->length-maria_portable_sizeof_char_ptr)*8)); /* Max blob length */ + } + } + else if (type == FIELD_SKIP_PRESPACE || + type == FIELD_SKIP_ENDSPACE) + { + if (pack_reclength != INT_MAX32) + pack_reclength+= rec->length > 255 ? 2 : 1; + min_pack_length++; + } + else if (type == FIELD_VARCHAR) + { + varchar_length+= rec->length-1; /* Used for min_pack_length */ + packed--; + pack_reclength++; + min_pack_length++; + /* We must test for 257 as length includes pack-length */ + if (test(rec->length >= 257)) + { + long_varchar_count++; + pack_reclength+= 2; /* May be packed on 3 bytes */ + } + } + else if (type != FIELD_SKIP_ZERO) + { + min_pack_length+=rec->length; + packed--; /* Not a pack record type */ + } + } + else /* FIELD_NORMAL */ + min_pack_length+=rec->length; + } + if ((packed & 7) == 1) + { /* Bad packing, try to remove a zero-field */ + while (rec != recinfo) + { + rec--; + if (rec->type == (int) FIELD_SKIP_ZERO && rec->length == 1) + { + rec->type=(int) FIELD_NORMAL; + packed--; + min_pack_length++; + break; + } + } + } + + if (packed || (flags & HA_PACK_RECORD)) + options|=HA_OPTION_PACK_RECORD; /* Must use packed records */ + /* We can't use checksum with static length rows */ + if (!(options & HA_OPTION_PACK_RECORD)) + options&= ~HA_OPTION_CHECKSUM; + if (!(options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + min_pack_length+= varchar_length; + if (flags & HA_CREATE_TMP_TABLE) + { + options|= HA_OPTION_TMP_TABLE; + create_mode|= O_EXCL | O_NOFOLLOW; + } + if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM)) + { + options|= HA_OPTION_CHECKSUM; + min_pack_length++; + } + if (flags & HA_CREATE_DELAY_KEY_WRITE) + options|= HA_OPTION_DELAY_KEY_WRITE; + if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) + options|= HA_OPTION_RELIES_ON_SQL_LAYER; + + packed=(packed+7)/8; + if (pack_reclength != INT_MAX32) + pack_reclength+= reclength+packed + + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); + min_pack_length+=packed; + + if (!ci->data_file_length && ci->max_rows) + { + if (pack_reclength == INT_MAX32 || + (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength) + ci->data_file_length= ~(ulonglong) 0; + else + ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength; + } + else if (!ci->max_rows) + ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length + + ((options & HA_OPTION_PACK_RECORD) ? + 3 : 0))); + + if (options & (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD)) + pointer=maria_get_pointer_length(ci->data_file_length,maria_data_pointer_size); + else + pointer=maria_get_pointer_length(ci->max_rows,maria_data_pointer_size); + if (!(max_rows=(ulonglong) ci->max_rows)) + max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length); + + + real_reclength=reclength; + if (!(options & (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD))) + { + if (reclength <= pointer) + reclength=pointer+1; /* reserve place for delete link */ + } + else + reclength+= long_varchar_count; /* We need space for varchar! */ + + max_key_length=0; tot_length=0 ; key_segs=0; + fulltext_keys=0; + max_key_block_length=0; + share.state.rec_per_key_part=rec_per_key_part; + share.state.key_root=key_root; + share.state.key_del=key_del; + if (uniques) + { + max_key_block_length= maria_block_size; + max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer; + } + + for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++) + { + + share.state.key_root[i]= HA_OFFSET_ERROR; + min_key_length_skip=length=real_length_diff=0; + key_length=pointer; + if (keydef->flag & HA_SPATIAL) + { +#ifdef HAVE_SPATIAL + /* BAR TODO to support 3D and more dimensions in the future */ + uint sp_segs=SPDIMS*2; + keydef->flag=HA_SPATIAL; + + if (flags & HA_DONT_TOUCH_DATA) + { + /* + called by mariachk - i.e. table structure was taken from + MYI file and SPATIAL key *does have* additional sp_segs keysegs. + keydef->seg here points right at the GEOMETRY segment, + so we only need to decrease keydef->keysegs. + (see maria_recreate_table() in _ma_check.c) + */ + keydef->keysegs-=sp_segs-1; + } + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { + if (keyseg->type != HA_KEYTYPE_BINARY && + keyseg->type != HA_KEYTYPE_VARBINARY1 && + keyseg->type != HA_KEYTYPE_VARBINARY2) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + } + keydef->keysegs+=sp_segs; + key_length+=SPLEN*sp_segs; + length++; /* At least one length byte */ + min_key_length_skip+=SPLEN*2*SPDIMS; +#else + my_errno= HA_ERR_UNSUPPORTED; + goto err; +#endif /*HAVE_SPATIAL*/ + } + else if (keydef->flag & HA_FULLTEXT) + { + keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { + if (keyseg->type != HA_KEYTYPE_TEXT && + keyseg->type != HA_KEYTYPE_VARTEXT1 && + keyseg->type != HA_KEYTYPE_VARTEXT2) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + if (!(keyseg->flag & HA_BLOB_PART) && + (keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARTEXT2)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)? + 1 : 2); + } + } + + fulltext_keys++; + key_length+= HA_FT_MAXBYTELEN+HA_FT_WLEN; + length++; /* At least one length byte */ + min_key_length_skip+=HA_FT_MAXBYTELEN; + real_length_diff=HA_FT_MAXBYTELEN-FT_MAX_WORD_LEN_FOR_SORT; + } + else + { + /* Test if prefix compression */ + if (keydef->flag & HA_PACK_KEY) + { + /* Can't use space_compression on number keys */ + if ((keydef->seg[0].flag & HA_SPACE_PACK) && + keydef->seg[0].type == (int) HA_KEYTYPE_NUM) + keydef->seg[0].flag&= ~HA_SPACE_PACK; + + /* Only use HA_PACK_KEY when first segment is a variable length key */ + if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART | + HA_VAR_LENGTH_PART))) + { + /* pack relative to previous key */ + keydef->flag&= ~HA_PACK_KEY; + keydef->flag|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY; + } + else + { + keydef->seg[0].flag|=HA_PACK_KEY; /* for easyer intern test */ + keydef->flag|=HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + } + } + if (keydef->flag & HA_BINARY_PACK_KEY) + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + if (keydef->flag & HA_AUTO_KEY && ci->with_auto_increment) + share.base.auto_key=i+1; + for (j=0, keyseg=keydef->seg ; j < keydef->keysegs ; j++, keyseg++) + { + /* numbers are stored with high by first to make compression easier */ + switch (keyseg->type) { + case HA_KEYTYPE_SHORT_INT: + case HA_KEYTYPE_LONG_INT: + case HA_KEYTYPE_FLOAT: + case HA_KEYTYPE_DOUBLE: + case HA_KEYTYPE_USHORT_INT: + case HA_KEYTYPE_ULONG_INT: + case HA_KEYTYPE_LONGLONG: + case HA_KEYTYPE_ULONGLONG: + case HA_KEYTYPE_INT24: + case HA_KEYTYPE_UINT24: + case HA_KEYTYPE_INT8: + keyseg->flag|= HA_SWAP_KEY; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + break; + } + if (keyseg->flag & HA_SPACE_PACK) + { + DBUG_ASSERT(!(keyseg->flag & HA_VAR_LENGTH_PART)); + keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + length++; /* At least one length byte */ + min_key_length_skip+=keyseg->length; + if (keyseg->length >= 255) + { /* prefix may be 3 bytes */ + min_key_length_skip+=2; + length+=2; + } + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + DBUG_ASSERT(!test_all_bits(keyseg->flag, + (HA_VAR_LENGTH_PART | HA_BLOB_PART))); + keydef->flag|=HA_VAR_LENGTH_KEY; + length++; /* At least one length byte */ + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + min_key_length_skip+=keyseg->length; + if (keyseg->length >= 255) + { /* prefix may be 3 bytes */ + min_key_length_skip+=2; + length+=2; + } + } + key_length+= keyseg->length; + if (keyseg->null_bit) + { + key_length++; + options|=HA_OPTION_PACK_KEYS; + keyseg->flag|=HA_NULL_PART; + keydef->flag|=HA_VAR_LENGTH_KEY | HA_NULL_PART_KEY; + } + } + } /* if HA_FULLTEXT */ + key_segs+=keydef->keysegs; + if (keydef->keysegs > HA_MAX_KEY_SEG) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + /* + key_segs may be 0 in the case when we only want to be able to + add on row into the table. This can happen with some DISTINCT queries + in MySQL + */ + if ((keydef->flag & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME && + key_segs) + share.state.rec_per_key_part[key_segs-1]=1L; + length+=key_length; + keydef->block_length= MARIA_BLOCK_SIZE(length-real_length_diff, + pointer,MARIA_MAX_KEYPTR_SIZE); + if (keydef->block_length > MARIA_MAX_KEY_BLOCK_LENGTH || + length >= HA_MAX_KEY_BUFF) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + set_if_bigger(max_key_block_length,keydef->block_length); + keydef->keylength= (uint16) key_length; + keydef->minlength= (uint16) (length-min_key_length_skip); + keydef->maxlength= (uint16) length; + + if (length > max_key_length) + max_key_length= length; + tot_length+= (max_rows/(ulong) (((uint) keydef->block_length-5)/ + (length*2)))* + (ulong) keydef->block_length; + } + for (i=max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH ; i-- ; ) + key_del[i]=HA_OFFSET_ERROR; + + unique_key_parts=0; + offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; + for (i=0, uniquedef=uniquedefs ; i < uniques ; i++ , uniquedef++) + { + uniquedef->key=keys+i; + unique_key_parts+=uniquedef->keysegs; + share.state.key_root[keys+i]= HA_OFFSET_ERROR; + tot_length+= (max_rows/(ulong) (((uint) maria_block_size-5)/ + ((MARIA_UNIQUE_HASH_LENGTH + pointer)*2)))* + (ulong) maria_block_size; + } + keys+=uniques; /* Each unique has 1 key */ + key_segs+=uniques; /* Each unique has 1 key seg */ + + base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE + + max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH* + MARIA_STATE_KEYBLOCK_SIZE+ + key_segs*MARIA_STATE_KEYSEG_SIZE); + info_length=base_pos+(uint) (MARIA_BASE_INFO_SIZE+ + keys * MARIA_KEYDEF_SIZE+ + uniques * MARIA_UNIQUEDEF_SIZE + + (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ + columns*MARIA_COLUMNDEF_SIZE); + + bmove(share.state.header.file_version,(byte*) maria_file_magic,4); + ci->old_options=options| (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ? + HA_OPTION_COMPRESS_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD: 0); + mi_int2store(share.state.header.options,ci->old_options); + mi_int2store(share.state.header.header_length,info_length); + mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE); + mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE); + mi_int2store(share.state.header.base_pos,base_pos); + share.state.header.language= (ci->language ? + ci->language : default_charset_info->number); + share.state.header.max_block_size=max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH; + + share.state.dellink = HA_OFFSET_ERROR; + share.state.process= (ulong) getpid(); + share.state.unique= (ulong) 0; + share.state.update_count=(ulong) 0; + share.state.version= (ulong) time((time_t*) 0); + share.state.sortkey= (ushort) ~0; + share.state.auto_increment=ci->auto_increment; + share.options=options; + share.base.rec_reflength=pointer; + /* Get estimate for index file length (this may be wrong for FT keys) */ + tmp= (tot_length + max_key_block_length * keys * + MARIA_INDEX_BLOCK_MARGIN) / MARIA_MIN_KEY_BLOCK_LENGTH; + /* + use maximum of key_file_length we calculated and key_file_length value we + got from MYI file header (see also mariapack.c:save_state) + */ + share.base.key_reflength= + maria_get_pointer_length(max(ci->key_file_length,tmp),3); + share.base.keys= share.state.header.keys= keys; + share.state.header.uniques= uniques; + share.state.header.fulltext_keys= fulltext_keys; + mi_int2store(share.state.header.key_parts,key_segs); + mi_int2store(share.state.header.unique_key_parts,unique_key_parts); + + maria_set_all_keys_active(share.state.key_map, keys); + share.base.keystart = share.state.state.key_file_length= + MY_ALIGN(info_length, maria_block_size); + share.base.max_key_block_length=max_key_block_length; + share.base.max_key_length=ALIGN_SIZE(max_key_length+4); + share.base.records=ci->max_rows; + share.base.reloc= ci->reloc_rows; + share.base.reclength=real_reclength; + share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); + share.base.max_pack_length=pack_reclength; + share.base.min_pack_length=min_pack_length; + share.base.pack_bits=packed; + share.base.fields=fields; + share.base.pack_fields=packed; +#ifdef USE_RAID + share.base.raid_type=ci->raid_type; + share.base.raid_chunks=ci->raid_chunks; + share.base.raid_chunksize=ci->raid_chunksize; +#endif + + /* max_data_file_length and max_key_file_length are recalculated on open */ + if (options & HA_OPTION_TMP_TABLE) + share.base.max_data_file_length=(my_off_t) ci->data_file_length; + + share.base.min_block_length= + (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH && + ! share.base.blobs) ? + max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) : + MARIA_EXTEND_BLOCK_LENGTH; + if (! (flags & HA_DONT_TOUCH_DATA)) + share.state.create_time= (long) time((time_t*) 0); + + pthread_mutex_lock(&THR_LOCK_maria); + + if (ci->index_file_name) + { + char *iext= strrchr(ci->index_file_name, '.'); + int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT); + + fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME| (have_iext ? MY_REPLACE_EXT :MY_APPEND_EXT)); + fn_format(linkname, name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME|MY_APPEND_EXT); + linkname_ptr=linkname; + /* + Don't create the table if the link or file exists to ensure that one + doesn't accidently destroy another table. + */ + create_flag=0; + } + else + { + fn_format(filename, name, "", MARIA_NAME_IEXT, + (MY_UNPACK_FILENAME | + (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | + MY_APPEND_EXT); + linkname_ptr=0; + /* Replace the current file */ + create_flag=MY_DELETE_OLD; + } + + /* + If a MRG_MARIA table is in use, the mapped MARIA tables are open, + but no entry is made in the table cache for them. + A TRUNCATE command checks for the table in the cache only and could + be fooled to believe, the table is not open. + Pull the emergency brake in this situation. (Bug #8306) + */ + if (_ma_test_if_reopen(filename)) + { + my_printf_error(0, "MARIA table '%s' is in use " + "(most likely by a MERGE table). Try FLUSH TABLES.", + MYF(0), name + dirname_length(name)); + goto err; + } + + if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag))) < 0) + goto err; + errpos=1; + + if (!(flags & HA_DONT_TOUCH_DATA)) + { +#ifdef USE_RAID + if (share.base.raid_type) + { + (void) fn_format(filename, name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + if ((dfile=my_raid_create(filename, 0, create_mode, + share.base.raid_type, + share.base.raid_chunks, + share.base.raid_chunksize, + MYF(MY_WME | MY_RAID))) < 0) + goto err; + } + else +#endif + { + if (ci->data_file_name) + { + char *dext= strrchr(ci->data_file_name, '.'); + int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); + + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + fn_format(linkname, name, "",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr=linkname; + create_flag=0; + } + else + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr=0; + create_flag=MY_DELETE_OLD; + } + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag))) < 0) + goto err; + } + errpos=3; + } + + if (_ma_state_info_write(file, &share.state, 2) || + _ma_base_info_write(file, &share.base)) + goto err; +#ifndef DBUG_OFF + if ((uint) my_tell(file,MYF(0)) != base_pos+ MARIA_BASE_INFO_SIZE) + { + uint pos=(uint) my_tell(file,MYF(0)); + DBUG_PRINT("warning",("base_length: %d != used_length: %d", + base_pos+ MARIA_BASE_INFO_SIZE, pos)); + } +#endif + + /* Write key and keyseg definitions */ + for (i=0 ; i < share.base.keys - uniques; i++) + { + uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0; + + if (_ma_keydef_write(file, &keydefs[i])) + goto err; + for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++) + if (_ma_keyseg_write(file, &keydefs[i].seg[j])) + goto err; +#ifdef HAVE_SPATIAL + for (j=0 ; j < sp_segs ; j++) + { + HA_KEYSEG sseg; + sseg.type=SPTYPE; + sseg.language= 7; /* Binary */ + sseg.null_bit=0; + sseg.bit_start=0; + sseg.bit_end=0; + sseg.bit_length= 0; + sseg.bit_pos= 0; + sseg.length=SPLEN; + sseg.null_pos=0; + sseg.start=j*SPLEN; + sseg.flag= HA_SWAP_KEY; + if (_ma_keyseg_write(file, &sseg)) + goto err; + } +#endif + } + /* Create extra keys for unique definitions */ + offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; + bzero((char*) &tmp_keydef,sizeof(tmp_keydef)); + bzero((char*) &tmp_keyseg,sizeof(tmp_keyseg)); + for (i=0; i < uniques ; i++) + { + tmp_keydef.keysegs=1; + tmp_keydef.flag= HA_UNIQUE_CHECK; + tmp_keydef.block_length= (uint16)maria_block_size; + tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer; + tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength; + tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE; + tmp_keyseg.length= MARIA_UNIQUE_HASH_LENGTH; + tmp_keyseg.start= offset; + offset+= MARIA_UNIQUE_HASH_LENGTH; + if (_ma_keydef_write(file,&tmp_keydef) || + _ma_keyseg_write(file,(&tmp_keyseg))) + goto err; + } + + /* Save unique definition */ + for (i=0 ; i < share.state.header.uniques ; i++) + { + HA_KEYSEG *keyseg_end; + keyseg= uniquedefs[i].seg; + if (_ma_uniquedef_write(file, &uniquedefs[i])) + goto err; + for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs; + keyseg < keyseg_end; + keyseg++) + { + switch (keyseg->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + keyseg->flag|= HA_VAR_LENGTH_PART; + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + break; + } + if (_ma_keyseg_write(file, keyseg)) + goto err; + } + } + for (i=0 ; i < share.base.fields ; i++) + if (_ma_recinfo_write(file, &recinfo[i])) + goto err; + +#ifndef DBUG_OFF + if ((uint) my_tell(file,MYF(0)) != info_length) + { + uint pos= (uint) my_tell(file,MYF(0)); + DBUG_PRINT("warning",("info_length: %d != used_length: %d", + info_length, pos)); + } +#endif + + /* Enlarge files */ + if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0))) + goto err; + + if (! (flags & HA_DONT_TOUCH_DATA)) + { +#ifdef USE_RELOC + if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0))) + goto err; +#endif + errpos=2; + if (my_close(dfile,MYF(0))) + goto err; + } + errpos=0; + pthread_mutex_unlock(&THR_LOCK_maria); + if (my_close(file,MYF(0))) + goto err; + my_free((char*) rec_per_key_part,MYF(0)); + DBUG_RETURN(0); + +err: + pthread_mutex_unlock(&THR_LOCK_maria); + save_errno=my_errno; + switch (errpos) { + case 3: + VOID(my_close(dfile,MYF(0))); + /* fall through */ + case 2: + /* QQ: Tõnu should add a call to my_raid_delete() here */ + if (! (flags & HA_DONT_TOUCH_DATA)) + my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT), + MYF(0)); + /* fall through */ + case 1: + VOID(my_close(file,MYF(0))); + if (! (flags & HA_DONT_TOUCH_DATA)) + my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT), + MYF(0)); + } + my_free((char*) rec_per_key_part, MYF(0)); + DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */ +} + + +uint maria_get_pointer_length(ulonglong file_length, uint def) +{ + DBUG_ASSERT(def >= 2 && def <= 7); + if (file_length) /* If not default */ + { +#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS + if (file_length >= (longlong) 1 << 56) + def=8; +#endif + if (file_length >= (longlong) 1 << 48) + def=7; + if (file_length >= (longlong) 1 << 40) + def=6; + else if (file_length >= (longlong) 1 << 32) + def=5; + else if (file_length >= (1L << 24)) + def=4; + else if (file_length >= (1L << 16)) + def=3; + else + def=2; + } + return def; +} diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c new file mode 100644 index 00000000000..7f2bff85047 --- /dev/null +++ b/storage/maria/ma_dbug.c @@ -0,0 +1,193 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Support rutiner with are using with dbug */ + +#include "maria_def.h" + + /* Print a key in user understandable format */ + +void _ma_print_key(FILE *stream, register HA_KEYSEG *keyseg, + const uchar *key, uint length) +{ + int flag; + short int s_1; + long int l_1; + float f_1; + double d_1; + const uchar *end; + const uchar *key_end=key+length; + + VOID(fputs("Key: \"",stream)); + flag=0; + for (; keyseg->type && key < key_end ;keyseg++) + { + if (flag++) + VOID(putc('-',stream)); + end= key+ keyseg->length; + if (keyseg->flag & HA_NULL_PART) + { + /* A NULL value is encoded by a 1-byte flag. Zero means NULL. */ + if (! *(key++)) + { + fprintf(stream,"NULL"); + continue; + } + } + + switch (keyseg->type) { + case HA_KEYTYPE_BINARY: + if (!(keyseg->flag & HA_SPACE_PACK) && keyseg->length == 1) + { /* packed binary digit */ + VOID(fprintf(stream,"%d",(uint) *key++)); + break; + } + /* fall through */ + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_NUM: + if (keyseg->flag & HA_SPACE_PACK) + { + VOID(fprintf(stream,"%.*s",(int) *key,key+1)); + key+= (int) *key+1; + } + else + { + VOID(fprintf(stream,"%.*s",(int) keyseg->length,key)); + key=end; + } + break; + case HA_KEYTYPE_INT8: + VOID(fprintf(stream,"%d",(int) *((signed char*) key))); + key=end; + break; + case HA_KEYTYPE_SHORT_INT: + s_1= mi_sint2korr(key); + VOID(fprintf(stream,"%d",(int) s_1)); + key=end; + break; + case HA_KEYTYPE_USHORT_INT: + { + ushort u_1; + u_1= mi_uint2korr(key); + VOID(fprintf(stream,"%u",(uint) u_1)); + key=end; + break; + } + case HA_KEYTYPE_LONG_INT: + l_1=mi_sint4korr(key); + VOID(fprintf(stream,"%ld",l_1)); + key=end; + break; + case HA_KEYTYPE_ULONG_INT: + l_1=mi_sint4korr(key); + VOID(fprintf(stream,"%lu",(ulong) l_1)); + key=end; + break; + case HA_KEYTYPE_INT24: + VOID(fprintf(stream,"%ld",(long) mi_sint3korr(key))); + key=end; + break; + case HA_KEYTYPE_UINT24: + VOID(fprintf(stream,"%lu",(ulong) mi_uint3korr(key))); + key=end; + break; + case HA_KEYTYPE_FLOAT: + mi_float4get(f_1,key); + VOID(fprintf(stream,"%g",(double) f_1)); + key=end; + break; + case HA_KEYTYPE_DOUBLE: + mi_float8get(d_1,key); + VOID(fprintf(stream,"%g",d_1)); + key=end; + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + { + char buff[21]; + longlong2str(mi_sint8korr(key),buff,-10); + VOID(fprintf(stream,"%s",buff)); + key=end; + break; + } + case HA_KEYTYPE_ULONGLONG: + { + char buff[21]; + longlong2str(mi_sint8korr(key),buff,10); + VOID(fprintf(stream,"%s",buff)); + key=end; + break; + } + case HA_KEYTYPE_BIT: + { + uint i; + fputs("0x",stream); + for (i=0 ; i < keyseg->length ; i++) + fprintf(stream, "%02x", (uint) *key++); + key= end; + break; + } + +#endif + case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */ + case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */ + { + uint tmp_length; + get_key_length(tmp_length,key); + /* + The following command sometimes gives a warning from valgrind. + Not yet sure if the bug is in valgrind, glibc or mysqld + */ + VOID(fprintf(stream,"%.*s",(int) tmp_length,key)); + key+=tmp_length; + break; + } + default: break; /* This never happens */ + } + } + VOID(fputs("\"\n",stream)); + return; +} /* print_key */ + + +#ifdef EXTRA_DEBUG + +my_bool _ma_check_table_is_closed(const char *name, const char *where) +{ + char filename[FN_REFLEN]; + LIST *pos; + DBUG_ENTER("_ma_check_table_is_closed"); + + (void) fn_format(filename,name,"",MARIA_NAME_IEXT,4+16+32); + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info=(MARIA_HA*) pos->data; + MARIA_SHARE *share=info->s; + if (!strcmp(share->unique_file_name,filename)) + { + if (share->last_version) + { + fprintf(stderr,"Warning: Table: %s is open on %s\n", name,where); + DBUG_PRINT("warning",("Table: %s is open on %s", name,where)); + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} +#endif /* EXTRA_DEBUG */ diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c new file mode 100644 index 00000000000..9e06b633171 --- /dev/null +++ b/storage/maria/ma_delete.c @@ -0,0 +1,890 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Remove a row from a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag, + uchar *key,uint key_length,my_off_t page,uchar *anc_buff); +static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key,uchar *anc_buff, + my_off_t leaf_page,uchar *leaf_buff,uchar *keypos, + my_off_t next_block,uchar *ret_key); +static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *anc_buff, + my_off_t leaf_page,uchar *leaf_buff,uchar *keypos); +static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,uchar *keypos, + uchar *lastkey,uchar *page_end, + my_off_t *next_block); +static int _ma_ck_real_delete(register MARIA_HA *info,MARIA_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root); + + +int maria_delete(MARIA_HA *info,const byte *record) +{ + uint i; + uchar *old_key; + int save_errno; + char lastpos[8]; + + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_delete"); + + /* Test if record is in datafile */ + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + DBUG_EXECUTE_IF("my_error_test_undefined_error", + maria_print_error(info->s, INT_MAX); + DBUG_RETURN(my_errno= INT_MAX);); + if (!(info->update & HA_STATE_AKTIV)) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */ + } + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + if (info->s->calc_checksum) + info->checksum=(*info->s->calc_checksum)(info,record); + if ((*share->compare_record)(info,record)) + goto err; /* Error on read-check */ + + if (_ma_mark_file_changed(info)) + goto err; + + /* Remove all keys from the .ISAM file */ + + old_key=info->lastkey2; + for (i=0 ; i < share->base.keys ; i++ ) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + info->s->keyinfo[i].version++; + if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_del(info,i,(char*) old_key,record,info->lastpos)) + goto err; + } + else + { + if (info->s->keyinfo[i].ck_delete(info,i,old_key, + _ma_make_key(info,i,old_key,record,info->lastpos))) + goto err; + } + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + } + } + + if ((*share->delete_record)(info)) + goto err; /* Remove record from database */ + info->state->checksum-=info->checksum; + + info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED; + info->state->records--; + + mi_sizestore(lastpos,info->lastpos); + maria_log_command(MARIA_LOG_DELETE,info,(byte*) lastpos,sizeof(lastpos),0); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (delete)", info->filename)); + (*info->invalidator)(info->filename); + info->invalidator=0; + } + DBUG_RETURN(0); + +err: + save_errno=my_errno; + mi_sizestore(lastpos,info->lastpos); + maria_log_command(MARIA_LOG_DELETE,info,(byte*) lastpos, sizeof(lastpos),0); + if (save_errno != HA_ERR_RECORD_CHANGED) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* mark table crashed */ + } + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + allow_break(); /* Allow SIGHUP & SIGINT */ + my_errno=save_errno; + if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + } + + DBUG_RETURN(my_errno); +} /* maria_delete */ + + + /* Remove a key from the btree index */ + +int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key, + uint key_length) +{ + return _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length, + &info->s->state.key_root[keynr]); +} /* _ma_ck_delete */ + + +static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root) +{ + int error; + uint nod_flag; + my_off_t old_root; + uchar *root_buff; + DBUG_ENTER("_ma_ck_real_delete"); + + if ((old_root=*root) == HA_OFFSET_ERROR) + { + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno=HA_ERR_CRASHED); + } + if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + { + DBUG_PRINT("error",("Couldn't allocate memory")); + DBUG_RETURN(my_errno=ENOMEM); + } + DBUG_PRINT("info",("root_page: %ld",old_root)); + if (!_ma_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0)) + { + error= -1; + goto err; + } + if ((error=d_search(info,keyinfo, + (keyinfo->flag & HA_FULLTEXT ? SEARCH_FIND | SEARCH_UPDATE + : SEARCH_SAME), + key,key_length,old_root,root_buff)) >0) + { + if (error == 2) + { + DBUG_PRINT("test",("Enlarging of root when deleting")); + error= _ma_enlarge_root(info,keyinfo,key,root); + } + else /* error == 1 */ + { + if (maria_getint(root_buff) <= (nod_flag=_ma_test_if_nod(root_buff))+3) + { + error=0; + if (nod_flag) + *root= _ma_kpos(nod_flag,root_buff+2+nod_flag); + else + *root=HA_OFFSET_ERROR; + if (_ma_dispose(info,keyinfo,old_root,DFLT_INIT_HITS)) + error= -1; + } + else + error= _ma_write_keypage(info,keyinfo,old_root, + DFLT_INIT_HITS,root_buff); + } + } +err: + my_afree((gptr) root_buff); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} /* _ma_ck_real_delete */ + + + /* + ** Remove key below key root + ** Return values: + ** 1 if there are less buffers; In this case anc_buff is not saved + ** 2 if there are more buffers + ** -1 on errors + */ + +static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uint comp_flag, uchar *key, uint key_length, + my_off_t page, uchar *anc_buff) +{ + int flag,ret_value,save_flag; + uint length,nod_flag,search_key_length; + my_bool last_key; + uchar *leaf_buff,*keypos; + my_off_t leaf_page,next_block; + uchar lastkey[HA_MAX_KEY_BUFF]; + DBUG_ENTER("d_search"); + DBUG_DUMP("page",(byte*) anc_buff,maria_getint(anc_buff)); + + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length, + comp_flag, &keypos, lastkey, &last_key); + if (flag == MARIA_FOUND_WRONG_KEY) + { + DBUG_PRINT("error",("Found wrong key")); + DBUG_RETURN(-1); + } + nod_flag=_ma_test_if_nod(anc_buff); + + if (!flag && keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, lastkey); + subkeys=ft_sintXkorr(lastkey+off); + DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + if (info->ft1_to_ft2) + { + /* we're in ft1->ft2 conversion mode. Saving key data */ + insert_dynamic(info->ft1_to_ft2, (char*) (lastkey+off)); + } + else + { + /* we need exact match only if not in ft1->ft2 conversion mode */ + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY, + comp_flag, &keypos, lastkey, &last_key); + } + /* fall through to normal delete */ + } + else + { + /* popular word. two-level tree. going down */ + uint tmp_key_length; + my_off_t root; + uchar *kpos=keypos; + + if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey))) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + root= _ma_dpos(info,nod_flag,kpos); + if (subkeys == -1) + { + /* the last entry in sub-tree */ + if (_ma_dispose(info, keyinfo, root,DFLT_INIT_HITS)) + DBUG_RETURN(-1); + /* fall through to normal delete */ + } + else + { + keyinfo=&info->s->ft2_keyinfo; + kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + get_key_full_length_rdonly(off, key); + key+=off; + ret_value= _ma_ck_real_delete(info, &info->s->ft2_keyinfo, + key, HA_FT_WLEN, &root); + _ma_dpointer(info, kpos+HA_FT_WLEN, root); + subkeys++; + ft_intXstore(kpos, subkeys); + if (!ret_value) + ret_value= _ma_write_keypage(info,keyinfo,page, + DFLT_INIT_HITS,anc_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + } + } + } + leaf_buff=0; + LINT_INIT(leaf_page); + if (nod_flag) + { + leaf_page= _ma_kpos(nod_flag,keypos); + if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + { + DBUG_PRINT("error",("Couldn't allocate memory")); + my_errno=ENOMEM; + DBUG_PRINT("exit",("Return: %d",-1)); + DBUG_RETURN(-1); + } + if (!_ma_fetch_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff,0)) + goto err; + } + + if (flag != 0) + { + if (!nod_flag) + { + DBUG_PRINT("error",("Didn't find key")); + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; /* This should newer happend */ + goto err; + } + save_flag=0; + ret_value=d_search(info,keyinfo,comp_flag,key,key_length, + leaf_page,leaf_buff); + } + else + { /* Found key */ + uint tmp; + length=maria_getint(anc_buff); + if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length, + &next_block))) + goto err; + + length-= tmp; + + maria_putint(anc_buff,length,nod_flag); + if (!nod_flag) + { /* On leaf page */ + if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff)) + { + DBUG_PRINT("exit",("Return: %d",-1)); + DBUG_RETURN(-1); + } + /* Page will be update later if we return 1 */ + DBUG_RETURN(test(length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : + (uint) keyinfo->underflow_block_length))); + } + save_flag=1; + ret_value=del(info,keyinfo,key,anc_buff,leaf_page,leaf_buff,keypos, + next_block,lastkey); + } + if (ret_value >0) + { + save_flag=1; + if (ret_value == 1) + ret_value= underflow(info,keyinfo,anc_buff,leaf_page,leaf_buff,keypos); + else + { /* This happens only with packed keys */ + DBUG_PRINT("test",("Enlarging of key when deleting")); + if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) + goto err; + ret_value= _ma_insert(info,keyinfo,key,anc_buff,keypos,lastkey, + (uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0); + } + } + if (ret_value == 0 && maria_getint(anc_buff) > keyinfo->block_length) + { + save_flag=1; + ret_value= _ma_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2; + } + if (save_flag && ret_value != 1) + ret_value|= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff); + else + { + DBUG_DUMP("page",(byte*) anc_buff,maria_getint(anc_buff)); + } + my_afree((byte*) leaf_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + +err: + my_afree((byte*) leaf_buff); + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1); +} /* d_search */ + + + /* Remove a key that has a page-reference */ + +static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *key, + uchar *anc_buff, my_off_t leaf_page, uchar *leaf_buff, + uchar *keypos, /* Pos to where deleted key was */ + my_off_t next_block, + uchar *ret_key) /* key before keypos in anc_buff */ +{ + int ret_value,length; + uint a_length,nod_flag,tmp; + my_off_t next_page; + uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + MARIA_SHARE *share=info->s; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("del"); + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", leaf_page, + (ulong) keypos)); + DBUG_DUMP("leaf_buff",(byte*) leaf_buff,maria_getint(leaf_buff)); + + endpos=leaf_buff+maria_getint(leaf_buff); + if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + &tmp))) + DBUG_RETURN(-1); + + if ((nod_flag=_ma_test_if_nod(leaf_buff))) + { + next_page= _ma_kpos(nod_flag,endpos); + if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + DBUG_RETURN(-1); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) + ret_value= -1; + else + { + DBUG_DUMP("next_page",(byte*) next_buff,maria_getint(next_buff)); + if ((ret_value=del(info,keyinfo,key,anc_buff,next_page,next_buff, + keypos,next_block,ret_key)) >0) + { + endpos=leaf_buff+maria_getint(leaf_buff); + if (ret_value == 1) + { + ret_value=underflow(info,keyinfo,leaf_buff,next_page, + next_buff,endpos); + if (ret_value == 0 && maria_getint(leaf_buff) > keyinfo->block_length) + { + ret_value= _ma_split_page(info,keyinfo,key,leaf_buff,ret_key,0) | 2; + } + } + else + { + DBUG_PRINT("test",("Inserting of key when deleting")); + if (_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + &tmp)) + goto err; + ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff, + (uchar*) 0,(uchar*) 0,(my_off_t) 0,0); + } + } + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + } + my_afree((byte*) next_buff); + DBUG_RETURN(ret_value); + } + + /* Remove last key from leaf page */ + + maria_putint(leaf_buff,key_start-leaf_buff,nod_flag); + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + + /* Place last key in ancestor page on deleted key position */ + + a_length=maria_getint(anc_buff); + endpos=anc_buff+a_length; + if (keypos != anc_buff+2+share->base.key_reflength && + !_ma_get_last_key(info,keyinfo,anc_buff,ret_key,keypos,&tmp)) + goto err; + prev_key=(keypos == anc_buff+2+share->base.key_reflength ? + 0 : ret_key); + length=(*keyinfo->pack_key)(keyinfo,share->base.key_reflength, + keypos == endpos ? (uchar*) 0 : keypos, + prev_key, prev_key, + keybuff,&s_temp); + if (length > 0) + bmove_upp((byte*) endpos+length,(byte*) endpos,(uint) (endpos-keypos)); + else + bmove(keypos,keypos-length, (int) (endpos-keypos)+length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + /* Save pointer to next leaf */ + if (!(*keyinfo->get_key)(keyinfo,share->base.key_reflength,&keypos,ret_key)) + goto err; + _ma_kpointer(info,keypos - share->base.key_reflength,next_block); + maria_putint(anc_buff,a_length+length,share->base.key_reflength); + + DBUG_RETURN( maria_getint(leaf_buff) <= + (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : + (uint) keyinfo->underflow_block_length)); +err: + DBUG_RETURN(-1); +} /* del */ + + + /* Balances adjacent pages if underflow occours */ + +static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uchar *anc_buff, + my_off_t leaf_page,/* Ancestor page and underflow page */ + uchar *leaf_buff, + uchar *keypos) /* Position to pos after key */ +{ + int t_length; + uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, + key_reflength,key_length; + my_off_t next_page; + uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF], + *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key, + *after_key; + MARIA_KEY_PARAM s_temp; + MARIA_SHARE *share=info->s; + DBUG_ENTER("underflow"); + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page, + (ulong) keypos)); + DBUG_DUMP("anc_buff",(byte*) anc_buff,maria_getint(anc_buff)); + DBUG_DUMP("leaf_buff",(byte*) leaf_buff,maria_getint(leaf_buff)); + + buff=info->buff; + info->buff_used=1; + next_keypos=keypos; + nod_flag=_ma_test_if_nod(leaf_buff); + p_length=nod_flag+2; + anc_length=maria_getint(anc_buff); + leaf_length=maria_getint(leaf_buff); + key_reflength=share->base.key_reflength; + if (info->s->keyinfo+info->lastinx == keyinfo) + info->page_changed=1; + + if ((keypos < anc_buff+anc_length && (info->state->records & 1)) || + keypos == anc_buff+2+key_reflength) + { /* Use page right of anc-page */ + DBUG_PRINT("test",("use right page")); + + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { + if (!(next_keypos= _ma_get_key(info, keyinfo, + anc_buff, buff, keypos, &length))) + goto err; + } + else + { + /* Got to end of found key */ + buff[0]=buff[1]=0; /* Avoid length error check if packed key */ + if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, + buff)) + goto err; + } + next_page= _ma_kpos(key_reflength,next_keypos); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) + goto err; + buff_length=maria_getint(buff); + DBUG_DUMP("next",(byte*) buff,buff_length); + + /* find keys to make a big key-page */ + bmove((byte*) next_keypos-key_reflength,(byte*) buff+2, + key_reflength); + if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,next_keypos,&length) + || !_ma_get_last_key(info,keyinfo,leaf_buff,leaf_key, + leaf_buff+leaf_length,&length)) + goto err; + + /* merge pages and put parting key from anc_buff between */ + prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length, + prev_key, prev_key, + anc_key, &s_temp); + length=buff_length-p_length; + endpos=buff+length+leaf_length+t_length; + /* buff will always be larger than before !*/ + bmove_upp((byte*) endpos, (byte*) buff+buff_length,length); + memcpy((byte*) buff, (byte*) leaf_buff,(size_t) leaf_length); + (*keyinfo->store_key)(keyinfo,buff+leaf_length,&s_temp); + buff_length=(uint) (endpos-buff); + maria_putint(buff,buff_length,nod_flag); + + /* remove key from anc_buff */ + + if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) + goto err; + + anc_length-=s_length; + maria_putint(anc_buff,anc_length,key_reflength); + + if (buff_length <= keyinfo->block_length) + { /* Keys in one page */ + memcpy((byte*) leaf_buff,(byte*) buff,(size_t) buff_length); + if (_ma_dispose(info,keyinfo,next_page,DFLT_INIT_HITS)) + goto err; + } + else + { /* Page is full */ + endpos=anc_buff+anc_length; + DBUG_PRINT("test",("anc_buff: %lx endpos: %lx",anc_buff,endpos)); + if (keypos != anc_buff+2+key_reflength && + !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length)) + goto err; + if (!(half_pos= _ma_find_half_pos(nod_flag, keyinfo, buff, leaf_key, + &key_length, &after_key))) + goto err; + length=(uint) (half_pos-buff); + memcpy((byte*) leaf_buff,(byte*) buff,(size_t) length); + maria_putint(leaf_buff,length,nod_flag); + + /* Correct new keypointer to leaf_page */ + half_pos=after_key; + _ma_kpointer(info,leaf_key+key_length,next_page); + /* Save key in anc_buff */ + prev_key=(keypos == anc_buff+2+key_reflength ? (uchar*) 0 : anc_key), + t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, + (keypos == endpos ? (uchar*) 0 : + keypos), + prev_key, prev_key, + leaf_key, &s_temp); + if (t_length >= 0) + bmove_upp((byte*) endpos+t_length,(byte*) endpos, + (uint) (endpos-keypos)); + else + bmove(keypos,keypos-t_length,(uint) (endpos-keypos)+t_length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + maria_putint(anc_buff,(anc_length+=t_length),key_reflength); + + /* Store key first in new page */ + if (nod_flag) + bmove((byte*) buff+2,(byte*) half_pos-nod_flag,(size_t) nod_flag); + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key)) + goto err; + t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (uchar*) 0, + (uchar*) 0, (uchar *) 0, + leaf_key, &s_temp); + /* t_length will always be > 0 for a new page !*/ + length=(uint) ((buff+maria_getint(buff))-half_pos); + bmove((byte*) buff+p_length+t_length,(byte*) half_pos,(size_t) length); + (*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp); + maria_putint(buff,length+t_length+p_length,nod_flag); + + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) + goto err; + } + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + DBUG_RETURN(anc_length <= ((info->quick_mode ? MARIA_MIN_BLOCK_LENGTH : + (uint) keyinfo->underflow_block_length))); + } + + DBUG_PRINT("test",("use left page")); + + keypos= _ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length); + if (!keypos) + goto err; + next_page= _ma_kpos(key_reflength,keypos); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) + goto err; + buff_length=maria_getint(buff); + endpos=buff+buff_length; + DBUG_DUMP("prev",(byte*) buff,buff_length); + + /* find keys to make a big key-page */ + bmove((byte*) next_keypos - key_reflength,(byte*) leaf_buff+2, + key_reflength); + next_keypos=keypos; + if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, + anc_key)) + goto err; + if (!_ma_get_last_key(info,keyinfo,buff,leaf_key,endpos,&length)) + goto err; + + /* merge pages and put parting key from anc_buff between */ + prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (leaf_length == p_length ? + (uchar*) 0 : leaf_buff+p_length), + prev_key, prev_key, + anc_key, &s_temp); + if (t_length >= 0) + bmove((byte*) endpos+t_length,(byte*) leaf_buff+p_length, + (size_t) (leaf_length-p_length)); + else /* We gained space */ + bmove((byte*) endpos,(byte*) leaf_buff+((int) p_length-t_length), + (size_t) (leaf_length-p_length+t_length)); + + (*keyinfo->store_key)(keyinfo,endpos,&s_temp); + buff_length=buff_length+leaf_length-p_length+t_length; + maria_putint(buff,buff_length,nod_flag); + + /* remove key from anc_buff */ + if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) + goto err; + + anc_length-=s_length; + maria_putint(anc_buff,anc_length,key_reflength); + + if (buff_length <= keyinfo->block_length) + { /* Keys in one page */ + if (_ma_dispose(info,keyinfo,leaf_page,DFLT_INIT_HITS)) + goto err; + } + else + { /* Page is full */ + if (keypos == anc_buff+2+key_reflength) + anc_pos=0; /* First key */ + else if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_pos=anc_key,keypos, + &length)) + goto err; + endpos= _ma_find_half_pos(nod_flag,keyinfo,buff,leaf_key, + &key_length, &half_pos); + if (!endpos) + goto err; + _ma_kpointer(info,leaf_key+key_length,leaf_page); + /* Save key in anc_buff */ + DBUG_DUMP("anc_buff",(byte*) anc_buff,anc_length); + DBUG_DUMP("key_to_anc",(byte*) leaf_key,key_length); + + temp_pos=anc_buff+anc_length; + t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, + keypos == temp_pos ? (uchar*) 0 + : keypos, + anc_pos, anc_pos, + leaf_key,&s_temp); + if (t_length > 0) + bmove_upp((byte*) temp_pos+t_length,(byte*) temp_pos, + (uint) (temp_pos-keypos)); + else + bmove(keypos,keypos-t_length,(uint) (temp_pos-keypos)+t_length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + maria_putint(anc_buff,(anc_length+=t_length),key_reflength); + + /* Store first key on new page */ + if (nod_flag) + bmove((byte*) leaf_buff+2,(byte*) half_pos-nod_flag,(size_t) nod_flag); + if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key))) + goto err; + DBUG_DUMP("key_to_leaf",(byte*) leaf_key,length); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0, + (uchar*) 0, (uchar*) 0, leaf_key, &s_temp); + length=(uint) ((buff+buff_length)-half_pos); + DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length)); + bmove((byte*) leaf_buff+p_length+t_length,(byte*) half_pos, + (size_t) length); + (*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp); + maria_putint(leaf_buff,length+t_length+p_length,nod_flag); + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + maria_putint(buff,endpos-buff,nod_flag); + } + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) + goto err; + DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2); + +err: + DBUG_RETURN(-1); +} /* underflow */ + + + /* + remove a key from packed buffert + The current code doesn't handle the case that the next key may be + packed better against the previous key if there is a case difference + returns how many chars was removed or 0 on error + */ + +static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar *keypos, /* Where key starts */ + uchar *lastkey, /* key to be removed */ + uchar *page_end, /* End of page */ + my_off_t *next_block) /* ptr to next block */ +{ + int s_length; + uchar *start; + DBUG_ENTER("remove_key"); + DBUG_PRINT("enter",("keypos: %lx page_end: %lx",keypos,page_end)); + + start=keypos; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + s_length=(int) (keyinfo->keylength+nod_flag); + if (next_block && nod_flag) + *next_block= _ma_kpos(nod_flag,keypos+s_length); + } + else + { /* Let keypos point at next key */ + /* Calculate length of key */ + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) + DBUG_RETURN(0); /* Error */ + + if (next_block && nod_flag) + *next_block= _ma_kpos(nod_flag,keypos); + s_length=(int) (keypos-start); + if (keypos != page_end) + { + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { + uchar *old_key=start; + uint next_length,prev_length,prev_pack_length; + get_key_length(next_length,keypos); + get_key_pack_length(prev_length,prev_pack_length,old_key); + if (next_length > prev_length) + { + /* We have to copy data from the current key to the next key */ + bmove_upp((char*) keypos,(char*) (lastkey+next_length), + (next_length-prev_length)); + keypos-=(next_length-prev_length)+prev_pack_length; + store_key_length(keypos,prev_length); + s_length=(int) (keypos-start); + } + } + else + { + /* Check if a variable length first key part */ + if ((keyinfo->seg->flag & HA_PACK_KEY) && *keypos & 128) + { + /* Next key is packed against the current one */ + uint next_length,prev_length,prev_pack_length,lastkey_length, + rest_length; + if (keyinfo->seg[0].length >= 127) + { + if (!(prev_length=mi_uint2korr(start) & 32767)) + goto end; + next_length=mi_uint2korr(keypos) & 32767; + keypos+=2; + prev_pack_length=2; + } + else + { + if (!(prev_length= *start & 127)) + goto end; /* Same key as previous*/ + next_length= *keypos & 127; + keypos++; + prev_pack_length=1; + } + if (!(*start & 128)) + prev_length=0; /* prev key not packed */ + if (keyinfo->seg[0].flag & HA_NULL_PART) + lastkey++; /* Skip null marker */ + get_key_length(lastkey_length,lastkey); + if (!next_length) /* Same key after */ + { + next_length=lastkey_length; + rest_length=0; + } + else + get_key_length(rest_length,keypos); + + if (next_length >= prev_length) + { /* Key after is based on deleted key */ + uint pack_length,tmp; + bmove_upp((char*) keypos,(char*) (lastkey+next_length), + tmp=(next_length-prev_length)); + rest_length+=tmp; + pack_length= prev_length ? get_pack_length(rest_length): 0; + keypos-=tmp+pack_length+prev_pack_length; + s_length=(int) (keypos-start); + if (prev_length) /* Pack against prev key */ + { + *keypos++= start[0]; + if (prev_pack_length == 2) + *keypos++= start[1]; + store_key_length(keypos,rest_length); + } + else + { + /* Next key is not packed anymore */ + if (keyinfo->seg[0].flag & HA_NULL_PART) + { + rest_length++; /* Mark not null */ + } + if (prev_pack_length == 2) + { + mi_int2store(keypos,rest_length); + } + else + *keypos= rest_length; + } + } + } + } + } + } + end: + bmove((byte*) start,(byte*) start+s_length, + (uint) (page_end-start-s_length)); + DBUG_RETURN((uint) s_length); +} /* remove_key */ diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c new file mode 100644 index 00000000000..d71e4d7dce7 --- /dev/null +++ b/storage/maria/ma_delete_all.c @@ -0,0 +1,79 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Remove all rows from a MARIA table */ +/* This clears the status information and truncates files */ + +#include "maria_def.h" + +int maria_delete_all_rows(MARIA_HA *info) +{ + uint i; + char buf[22]; + MARIA_SHARE *share=info->s; + MARIA_STATE_INFO *state=&share->state; + DBUG_ENTER("maria_delete_all_rows"); + + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + if (_ma_mark_file_changed(info)) + goto err; + + info->state->records=info->state->del=state->split=0; + state->dellink = HA_OFFSET_ERROR; + state->sortkey= (ushort) ~0; + info->state->key_file_length=share->base.keystart; + info->state->data_file_length=0; + info->state->empty=info->state->key_empty=0; + info->state->checksum=0; + + for (i=share->base.max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH ; i-- ; ) + state->key_del[i]= HA_OFFSET_ERROR; + for (i=0 ; i < share->base.keys ; i++) + state->key_root[i]= HA_OFFSET_ERROR; + + maria_log_command(MARIA_LOG_DELETE_ALL,info,(byte*) 0,0,0); + /* + If we are using delayed keys or if the user has done changes to the tables + since it was locked then there may be key blocks in the key cache + */ + flush_key_blocks(share->key_cache, share->kfile, FLUSH_IGNORE_CHANGED); + if (my_chsize(info->dfile, 0, 0, MYF(MY_WME)) || + my_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)) ) + goto err; + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); +#ifdef HAVE_MMAP + /* Resize mmaped area */ + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, (my_off_t)0); + rw_unlock(&info->s->mmap_lock); +#endif + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(0); + +err: + { + int save_errno=my_errno; + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(my_errno=save_errno); + } +} /* maria_delete */ diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c new file mode 100644 index 00000000000..a9af9a62c99 --- /dev/null +++ b/storage/maria/ma_delete_table.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + deletes a table +*/ + +#include "ma_fulltext.h" + +int maria_delete_table(const char *name) +{ + char from[FN_REFLEN]; +#ifdef USE_RAID + uint raid_type=0,raid_chunks=0; +#endif + DBUG_ENTER("maria_delete_table"); + +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(name,"delete"); +#endif +#ifdef USE_RAID + { + MARIA_HA *info; + /* we use 'open_for_repair' to be able to delete a crashed table */ + if (!(info=maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR))) + DBUG_RETURN(my_errno); + raid_type = info->s->base.raid_type; + raid_chunks = info->s->base.raid_chunks; + maria_close(info); + } +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(name,"delete"); +#endif +#endif /* USE_RAID */ + + fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + if (my_delete_with_symlink(from, MYF(MY_WME))) + DBUG_RETURN(my_errno); + fn_format(from,name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); +#ifdef USE_RAID + if (raid_type) + DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME)) ? my_errno : 0); +#endif + DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME)) ? my_errno : 0); +} diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c new file mode 100644 index 00000000000..0fbf28b949a --- /dev/null +++ b/storage/maria/ma_dynrec.c @@ -0,0 +1,1811 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions to handle space-packed-records and blobs + + A row may be stored in one or more linked blocks. + The block size is between MARIA_MIN_BLOCK_LENGTH and MARIA_MAX_BLOCK_LENGTH. + Each block is aligned on MARIA_DYN_ALIGN_SIZE. + The reson for the max block size is to not have too many different types + of blocks. For the differnet block types, look at _ma_get_block_info() +*/ + +#include "maria_def.h" + +/* Enough for comparing if number is zero */ +static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + +static int write_dynamic_record(MARIA_HA *info,const byte *record, + ulong reclength); +static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos, + ulong *length); +static int update_dynamic_record(MARIA_HA *info,my_off_t filepos,byte *record, + ulong reclength); +static int delete_dynamic_record(MARIA_HA *info,my_off_t filepos, + uint second_read); +static int _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length); + +#ifdef THREAD +/* Play it safe; We have a small stack when using threads */ +#undef my_alloca +#undef my_afree +#define my_alloca(A) my_malloc((A),MYF(0)) +#define my_afree(A) my_free((A),MYF(0)) +#endif + + /* Interface function from MARIA_HA */ + +#ifdef HAVE_MMAP + +/* + Create mmaped area for MARIA handler + + SYNOPSIS + _ma_dynmap_file() + info MARIA handler + + RETURN + 0 ok + 1 error. +*/ + +my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size) +{ + DBUG_ENTER("_ma_dynmap_file"); + info->s->file_map= (byte*) + my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN), + info->s->mode==O_RDONLY ? PROT_READ : + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_NORESERVE, + info->dfile, 0L); + if (info->s->file_map == (byte*) MAP_FAILED) + { + info->s->file_map= NULL; + DBUG_RETURN(1); + } +#if defined(HAVE_MADVISE) + madvise(info->s->file_map, size, MADV_RANDOM); +#endif + info->s->mmaped_length= size; + DBUG_RETURN(0); +} + + +/* + Resize mmaped area for MARIA handler + + SYNOPSIS + _ma_remap_file() + info MARIA handler + + RETURN +*/ + +void _ma_remap_file(MARIA_HA *info, my_off_t size) +{ + if (info->s->file_map) + { + VOID(my_munmap(info->s->file_map, + (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN)); + _ma_dynmap_file(info, size); + } +} +#endif + + +/* + Read bytes from MySAM handler, using mmap or pread + + SYNOPSIS + _ma_mmap_pread() + info MARIA handler + Buffer Input buffer + Count Count of bytes for read + offset Start position + MyFlags + + RETURN + 0 ok +*/ + +uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + DBUG_PRINT("info", ("maria_read with mmap %d\n", info->dfile)); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->mmap_lock); + + /* + The following test may fail in the following cases: + - We failed to remap a memory area (fragmented memory?) + - This thread has done some writes, but not yet extended the + memory mapped area. + */ + + if (info->s->mmaped_length >= offset + Count) + { + memcpy(Buffer, info->s->file_map + offset, Count); + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return 0; + } + else + { + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return my_pread(info->dfile, Buffer, Count, offset, MyFlags); + } +} + + + /* wrapper for my_pread in case if mmap isn't used */ + +uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + return my_pread(info->dfile, Buffer, Count, offset, MyFlags); +} + + +/* + Write bytes to MySAM handler, using mmap or pwrite + + SYNOPSIS + _ma_mmap_pwrite() + info MARIA handler + Buffer Output buffer + Count Count of bytes for write + offset Start position + MyFlags + + RETURN + 0 ok + !=0 error. In this case return error from pwrite +*/ + +uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + DBUG_PRINT("info", ("maria_write with mmap %d\n", info->dfile)); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->mmap_lock); + + /* + The following test may fail in the following cases: + - We failed to remap a memory area (fragmented memory?) + - This thread has done some writes, but not yet extended the + memory mapped area. + */ + + if (info->s->mmaped_length >= offset + Count) + { + memcpy(info->s->file_map + offset, Buffer, Count); + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return 0; + } + else + { + info->s->nonmmaped_inserts++; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return my_pwrite(info->dfile, Buffer, Count, offset, MyFlags); + } + +} + + + /* wrapper for my_pwrite in case if mmap isn't used */ + +uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + return my_pwrite(info->dfile, Buffer, Count, offset, MyFlags); +} + + +int _ma_write_dynamic_record(MARIA_HA *info, const byte *record) +{ + ulong reclength= _ma_rec_pack(info,info->rec_buff,record); + return (write_dynamic_record(info,info->rec_buff,reclength)); +} + +int _ma_update_dynamic_record(MARIA_HA *info, my_off_t pos, const byte *record) +{ + uint length= _ma_rec_pack(info,info->rec_buff,record); + return (update_dynamic_record(info,pos,info->rec_buff,length)); +} + +int _ma_write_blob_record(MARIA_HA *info, const byte *record) +{ + byte *rec_buff; + int error; + ulong reclength,reclength2,extra; + + extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER+1); + reclength= (info->s->base.pack_reclength + + _ma_calc_total_blob_length(info,record)+ extra); +#ifdef NOT_USED /* We now support big rows */ + if (reclength > MARIA_DYN_MAX_ROW_LENGTH) + { + my_errno=HA_ERR_TO_BIG_ROW; + return -1; + } +#endif + if (!(rec_buff=(byte*) my_alloca(reclength))) + { + my_errno=ENOMEM; + return(-1); + } + reclength2= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + DBUG_PRINT("info",("reclength: %lu reclength2: %lu", + reclength, reclength2)); + DBUG_ASSERT(reclength2 <= reclength); + error=write_dynamic_record(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength2); + my_afree(rec_buff); + return(error); +} + + +int _ma_update_blob_record(MARIA_HA *info, my_off_t pos, const byte *record) +{ + byte *rec_buff; + int error; + ulong reclength,extra; + + extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER); + reclength= (info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,record)+ extra); +#ifdef NOT_USED /* We now support big rows */ + if (reclength > MARIA_DYN_MAX_ROW_LENGTH) + { + my_errno=HA_ERR_TO_BIG_ROW; + return -1; + } +#endif + if (!(rec_buff=(byte*) my_alloca(reclength))) + { + my_errno=ENOMEM; + return(-1); + } + reclength= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + error=update_dynamic_record(info,pos, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength); + my_afree(rec_buff); + return(error); +} + + +int _ma_delete_dynamic_record(MARIA_HA *info) +{ + return delete_dynamic_record(info,info->lastpos,0); +} + + + /* Write record to data-file */ + +static int write_dynamic_record(MARIA_HA *info, const byte *record, + ulong reclength) +{ + int flag; + ulong length; + my_off_t filepos; + DBUG_ENTER("write_dynamic_record"); + + flag=0; + do + { + if (_ma_find_writepos(info,reclength,&filepos,&length)) + goto err; + if (_ma_write_part_record(info,filepos,length, + (info->append_insert_at_end ? + HA_OFFSET_ERROR : info->s->state.dellink), + (byte**) &record,&reclength,&flag)) + goto err; + } while (reclength); + + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + + /* Get a block for data ; The given data-area must be used !! */ + +static int _ma_find_writepos(MARIA_HA *info, + ulong reclength, /* record length */ + my_off_t *filepos, /* Return file pos */ + ulong *length) /* length of block at filepos */ +{ + MARIA_BLOCK_INFO block_info; + ulong tmp; + DBUG_ENTER("_ma_find_writepos"); + + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) + { + /* Deleted blocks exists; Get last used block */ + *filepos=info->s->state.dellink; + block_info.second_read=0; + info->rec_cache.seek_not_done=1; + if (!(_ma_get_block_info(&block_info,info->dfile,info->s->state.dellink) & + BLOCK_DELETED)) + { + DBUG_PRINT("error",("Delete link crashed")); + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(-1); + } + info->s->state.dellink=block_info.next_filepos; + info->state->del--; + info->state->empty-= block_info.block_len; + *length= block_info.block_len; + } + else + { + /* No deleted blocks; Allocate a new block */ + *filepos=info->state->data_file_length; + if ((tmp=reclength+3 + test(reclength >= (65520-3))) < + info->s->base.min_block_length) + tmp= info->s->base.min_block_length; + else + tmp= ((tmp+MARIA_DYN_ALIGN_SIZE-1) & + (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1))); + if (info->state->data_file_length > + (info->s->base.max_data_file_length - tmp)) + { + my_errno=HA_ERR_RECORD_FILE_FULL; + DBUG_RETURN(-1); + } + if (tmp > MARIA_MAX_BLOCK_LENGTH) + tmp=MARIA_MAX_BLOCK_LENGTH; + *length= tmp; + info->state->data_file_length+= tmp; + info->s->state.split++; + info->update|=HA_STATE_WRITE_AT_END; + } + DBUG_RETURN(0); +} /* _ma_find_writepos */ + + + +/* + Unlink a deleted block from the deleted list. + This block will be combined with the preceding or next block to form + a big block. +*/ + +static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info) +{ + DBUG_ENTER("unlink_deleted_block"); + if (block_info->filepos == info->s->state.dellink) + { + /* First deleted block; We can just use this ! */ + info->s->state.dellink=block_info->next_filepos; + } + else + { + MARIA_BLOCK_INFO tmp; + tmp.second_read=0; + /* Unlink block from the previous block */ + if (!(_ma_get_block_info(&tmp,info->dfile,block_info->prev_filepos) + & BLOCK_DELETED)) + DBUG_RETURN(1); /* Something is wrong */ + mi_sizestore(tmp.header+4,block_info->next_filepos); + if (info->s->file_write(info,(char*) tmp.header+4,8, + block_info->prev_filepos+4, MYF(MY_NABP))) + DBUG_RETURN(1); + /* Unlink block from next block */ + if (block_info->next_filepos != HA_OFFSET_ERROR) + { + if (!(_ma_get_block_info(&tmp,info->dfile,block_info->next_filepos) + & BLOCK_DELETED)) + DBUG_RETURN(1); /* Something is wrong */ + mi_sizestore(tmp.header+12,block_info->prev_filepos); + if (info->s->file_write(info,(char*) tmp.header+12,8, + block_info->next_filepos+12, + MYF(MY_NABP))) + DBUG_RETURN(1); + } + } + /* We now have one less deleted block */ + info->state->del--; + info->state->empty-= block_info->block_len; + info->s->state.split--; + + /* + If this was a block that we where accessing through table scan + (maria_rrnd() or maria_scan(), then ensure that we skip over this block + when doing next maria_rrnd() or maria_scan(). + */ + if (info->nextpos == block_info->filepos) + info->nextpos+=block_info->block_len; + DBUG_RETURN(0); +} + + +/* + Add a backward link to delete block + + SYNOPSIS + update_backward_delete_link() + info MARIA handler + delete_block Position to delete block to update. + If this is 'HA_OFFSET_ERROR', nothing will be done + filepos Position to block that 'delete_block' should point to + + RETURN + 0 ok + 1 error. In this case my_error is set. +*/ + +static int update_backward_delete_link(MARIA_HA *info, my_off_t delete_block, + my_off_t filepos) +{ + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("update_backward_delete_link"); + + if (delete_block != HA_OFFSET_ERROR) + { + block_info.second_read=0; + if (_ma_get_block_info(&block_info,info->dfile,delete_block) + & BLOCK_DELETED) + { + char buff[8]; + mi_sizestore(buff,filepos); + if (info->s->file_write(info,buff, 8, delete_block+12, MYF(MY_NABP))) + DBUG_RETURN(1); /* Error on write */ + } + else + { + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); /* Wrong delete link */ + } + } + DBUG_RETURN(0); +} + + /* Delete datarecord from database */ + /* info->rec_cache.seek_not_done is updated in cmp_record */ + +static int delete_dynamic_record(MARIA_HA *info, my_off_t filepos, + uint second_read) +{ + uint length,b_type; + MARIA_BLOCK_INFO block_info,del_block; + int error; + my_bool remove_next_block; + DBUG_ENTER("delete_dynamic_record"); + + /* First add a link from the last block to the new one */ + error= update_backward_delete_link(info, info->s->state.dellink, filepos); + + block_info.second_read=second_read; + do + { + /* Remove block at 'filepos' */ + if ((b_type= _ma_get_block_info(&block_info,info->dfile,filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR) || + (length=(uint) (block_info.filepos-filepos) +block_info.block_len) < + MARIA_MIN_BLOCK_LENGTH) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); + } + /* Check if next block is a delete block */ + del_block.second_read=0; + remove_next_block=0; + if (_ma_get_block_info(&del_block,info->dfile,filepos+length) & + BLOCK_DELETED && del_block.block_len+length < MARIA_DYN_MAX_BLOCK_LENGTH) + { + /* We can't remove this yet as this block may be the head block */ + remove_next_block=1; + length+=del_block.block_len; + } + + block_info.header[0]=0; + mi_int3store(block_info.header+1,length); + mi_sizestore(block_info.header+4,info->s->state.dellink); + if (b_type & BLOCK_LAST) + bfill(block_info.header+12,8,255); + else + mi_sizestore(block_info.header+12,block_info.next_filepos); + if (info->s->file_write(info,(byte*) block_info.header,20,filepos, + MYF(MY_NABP))) + DBUG_RETURN(1); + info->s->state.dellink = filepos; + info->state->del++; + info->state->empty+=length; + filepos=block_info.next_filepos; + + /* Now it's safe to unlink the deleted block directly after this one */ + if (remove_next_block && unlink_deleted_block(info,&del_block)) + error=1; + } while (!(b_type & BLOCK_LAST)); + + DBUG_RETURN(error); +} + + + /* Write a block to datafile */ + +int _ma_write_part_record(MARIA_HA *info, + my_off_t filepos, /* points at empty block */ + ulong length, /* length of block */ + my_off_t next_filepos,/* Next empty block */ + byte **record, /* pointer to record ptr */ + ulong *reclength, /* length of *record */ + int *flag) /* *flag == 0 if header */ +{ + ulong head_length,res_length,extra_length,long_block,del_length; + byte *pos,*record_end; + my_off_t next_delete_block; + uchar temp[MARIA_SPLIT_LENGTH+MARIA_DYN_DELETE_BLOCK_HEADER]; + DBUG_ENTER("_ma_write_part_record"); + + next_delete_block=HA_OFFSET_ERROR; + + res_length=extra_length=0; + if (length > *reclength + MARIA_SPLIT_LENGTH) + { /* Splitt big block */ + res_length=MY_ALIGN(length- *reclength - MARIA_EXTEND_BLOCK_LENGTH, + MARIA_DYN_ALIGN_SIZE); + length-= res_length; /* Use this for first part */ + } + long_block= (length < 65520L && *reclength < 65520L) ? 0 : 1; + if (length == *reclength+ 3 + long_block) + { + /* Block is exactly of the right length */ + temp[0]=(uchar) (1+ *flag)+(uchar) long_block; /* Flag is 0 or 6 */ + if (long_block) + { + mi_int3store(temp+1,*reclength); + head_length=4; + } + else + { + mi_int2store(temp+1,*reclength); + head_length=3; + } + } + else if (length-long_block < *reclength+4) + { /* To short block */ + if (next_filepos == HA_OFFSET_ERROR) + next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end ? + info->s->state.dellink : info->state->data_file_length); + if (*flag == 0) /* First block */ + { + if (*reclength > MARIA_MAX_BLOCK_LENGTH) + { + head_length= 16; + temp[0]=13; + mi_int4store(temp+1,*reclength); + mi_int3store(temp+5,length-head_length); + mi_sizestore((byte*) temp+8,next_filepos); + } + else + { + head_length=5+8+long_block*2; + temp[0]=5+(uchar) long_block; + if (long_block) + { + mi_int3store(temp+1,*reclength); + mi_int3store(temp+4,length-head_length); + mi_sizestore((byte*) temp+7,next_filepos); + } + else + { + mi_int2store(temp+1,*reclength); + mi_int2store(temp+3,length-head_length); + mi_sizestore((byte*) temp+5,next_filepos); + } + } + } + else + { + head_length=3+8+long_block; + temp[0]=11+(uchar) long_block; + if (long_block) + { + mi_int3store(temp+1,length-head_length); + mi_sizestore((byte*) temp+4,next_filepos); + } + else + { + mi_int2store(temp+1,length-head_length); + mi_sizestore((byte*) temp+3,next_filepos); + } + } + } + else + { /* Block with empty info last */ + head_length=4+long_block; + extra_length= length- *reclength-head_length; + temp[0]= (uchar) (3+ *flag)+(uchar) long_block; /* 3,4 or 9,10 */ + if (long_block) + { + mi_int3store(temp+1,*reclength); + temp[4]= (uchar) (extra_length); + } + else + { + mi_int2store(temp+1,*reclength); + temp[3]= (uchar) (extra_length); + } + length= *reclength+head_length; /* Write only what is needed */ + } + DBUG_DUMP("header",(byte*) temp,head_length); + + /* Make a long block for one write */ + record_end= *record+length-head_length; + del_length=(res_length ? MARIA_DYN_DELETE_BLOCK_HEADER : 0); + bmove((byte*) (*record-head_length),(byte*) temp,head_length); + memcpy(temp,record_end,(size_t) (extra_length+del_length)); + bzero((byte*) record_end,extra_length); + + if (res_length) + { + /* Check first if we can join this block with the next one */ + MARIA_BLOCK_INFO del_block; + my_off_t next_block=filepos+length+extra_length+res_length; + + del_block.second_read=0; + if (next_block < info->state->data_file_length && + info->s->state.dellink != HA_OFFSET_ERROR) + { + if ((_ma_get_block_info(&del_block,info->dfile,next_block) + & BLOCK_DELETED) && + res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH) + { + if (unlink_deleted_block(info,&del_block)) + goto err; + res_length+=del_block.block_len; + } + } + + /* Create a delete link of the last part of the block */ + pos=record_end+extra_length; + pos[0]= '\0'; + mi_int3store(pos+1,res_length); + mi_sizestore(pos+4,info->s->state.dellink); + bfill(pos+12,8,255); /* End link */ + next_delete_block=info->s->state.dellink; + info->s->state.dellink= filepos+length+extra_length; + info->state->del++; + info->state->empty+=res_length; + info->s->state.split++; + } + if (info->opt_flag & WRITE_CACHE_USED && + info->update & HA_STATE_WRITE_AT_END) + { + if (info->update & HA_STATE_EXTEND_BLOCK) + { + info->update&= ~HA_STATE_EXTEND_BLOCK; + if (my_block_write(&info->rec_cache,(byte*) *record-head_length, + length+extra_length+del_length,filepos)) + goto err; + } + else if (my_b_write(&info->rec_cache,(byte*) *record-head_length, + length+extra_length+del_length)) + goto err; + } + else + { + info->rec_cache.seek_not_done=1; + if (info->s->file_write(info,(byte*) *record-head_length,length+extra_length+ + del_length,filepos,info->s->write_flag)) + goto err; + } + memcpy(record_end,temp,(size_t) (extra_length+del_length)); + *record=record_end; + *reclength-=(length-head_length); + *flag=6; + + if (del_length) + { + /* link the next delete block to this */ + if (update_backward_delete_link(info, next_delete_block, + info->s->state.dellink)) + goto err; + } + + DBUG_RETURN(0); +err: + DBUG_PRINT("exit",("errno: %d",my_errno)); + DBUG_RETURN(1); +} /* _ma_write_part_record */ + + + /* update record from datafile */ + +static int update_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *record, + ulong reclength) +{ + int flag; + uint error; + ulong length; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("update_dynamic_record"); + + flag=block_info.second_read=0; + while (reclength > 0) + { + if (filepos != info->s->state.dellink) + { + block_info.next_filepos= HA_OFFSET_ERROR; + if ((error= _ma_get_block_info(&block_info,info->dfile,filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + DBUG_PRINT("error",("Got wrong block info")); + if (!(error & BLOCK_FATAL_ERROR)) + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } + length=(ulong) (block_info.filepos-filepos) + block_info.block_len; + if (length < reclength) + { + uint tmp=MY_ALIGN(reclength - length + 3 + + test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE); + /* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */ + tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length; + /* Check if we can extend this block */ + if (block_info.filepos + block_info.block_len == + info->state->data_file_length && + info->state->data_file_length < + info->s->base.max_data_file_length-tmp) + { + /* extend file */ + DBUG_PRINT("info",("Extending file with %d bytes",tmp)); + if (info->nextpos == info->state->data_file_length) + info->nextpos+= tmp; + info->state->data_file_length+= tmp; + info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK; + length+=tmp; + } + else if (length < MARIA_MAX_BLOCK_LENGTH - MARIA_MIN_BLOCK_LENGTH) + { + /* + Check if next block is a deleted block + Above we have MARIA_MIN_BLOCK_LENGTH to avoid the problem where + the next block is so small it can't be splited which could + casue problems + */ + + MARIA_BLOCK_INFO del_block; + del_block.second_read=0; + if (_ma_get_block_info(&del_block,info->dfile, + block_info.filepos + block_info.block_len) & + BLOCK_DELETED) + { + /* Use; Unlink it and extend the current block */ + DBUG_PRINT("info",("Extending current block")); + if (unlink_deleted_block(info,&del_block)) + goto err; + if ((length+=del_block.block_len) > MARIA_MAX_BLOCK_LENGTH) + { + /* + New block was too big, link overflow part back to + delete list + */ + my_off_t next_pos; + ulong rest_length= length-MARIA_MAX_BLOCK_LENGTH; + set_if_bigger(rest_length, MARIA_MIN_BLOCK_LENGTH); + next_pos= del_block.filepos+ del_block.block_len - rest_length; + + if (update_backward_delete_link(info, info->s->state.dellink, + next_pos)) + DBUG_RETURN(1); + + /* create delete link for data that didn't fit into the page */ + del_block.header[0]=0; + mi_int3store(del_block.header+1, rest_length); + mi_sizestore(del_block.header+4,info->s->state.dellink); + bfill(del_block.header+12,8,255); + if (info->s->file_write(info,(byte*) del_block.header,20, next_pos, + MYF(MY_NABP))) + DBUG_RETURN(1); + info->s->state.dellink= next_pos; + info->s->state.split++; + info->state->del++; + info->state->empty+= rest_length; + length-= rest_length; + } + } + } + } + } + else + { + if (_ma_find_writepos(info,reclength,&filepos,&length)) + goto err; + } + if (_ma_write_part_record(info,filepos,length,block_info.next_filepos, + &record,&reclength,&flag)) + goto err; + if ((filepos=block_info.next_filepos) == HA_OFFSET_ERROR) + { + /* Start writing data on deleted blocks */ + filepos=info->s->state.dellink; + } + } + + if (block_info.next_filepos != HA_OFFSET_ERROR) + if (delete_dynamic_record(info,block_info.next_filepos,1)) + goto err; + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + + /* Pack a record. Return new reclength */ + +uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) +{ + uint length,new_length,flag,bit,i; + char *pos,*end,*startpos,*packpos; + enum en_fieldtype type; + reg3 MARIA_COLUMNDEF *rec; + MARIA_BLOB *blob; + DBUG_ENTER("_ma_rec_pack"); + + flag=0 ; bit=1; + startpos=packpos=to; to+= info->s->base.pack_bits; blob=info->blobs; + rec=info->s->rec; + + for (i=info->s->base.fields ; i-- > 0; from+= length,rec++) + { + length=(uint) rec->length; + if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL) + { + if (type == FIELD_BLOB) + { + if (!blob->length) + flag|=bit; + else + { + char *temp_pos; + size_t tmp_length=length-maria_portable_sizeof_char_ptr; + memcpy((byte*) to,from,tmp_length); + memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*)); + memcpy(to+tmp_length,temp_pos,(size_t) blob->length); + to+=tmp_length+blob->length; + } + blob++; + } + else if (type == FIELD_SKIP_ZERO) + { + if (memcmp((byte*) from,zero_string,length) == 0) + flag|=bit; + else + { + memcpy((byte*) to,from,(size_t) length); to+=length; + } + } + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + pos= (byte*) from; end= (byte*) from + length; + if (type == FIELD_SKIP_ENDSPACE) + { /* Pack trailing spaces */ + while (end > from && *(end-1) == ' ') + end--; + } + else + { /* Pack pref-spaces */ + while (pos < end && *pos == ' ') + pos++; + } + new_length=(uint) (end-pos); + if (new_length +1 + test(rec->length > 255 && new_length > 127) + < length) + { + if (rec->length > 255 && new_length > 127) + { + to[0]=(char) ((new_length & 127)+128); + to[1]=(char) (new_length >> 7); + to+=2; + } + else + *to++= (char) new_length; + memcpy((byte*) to,pos,(size_t) new_length); to+=new_length; + flag|=bit; + } + else + { + memcpy(to,from,(size_t) length); to+=length; + } + } + else if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) from; + *to++= *from; + } + else + { + tmp_length= uint2korr(from); + store_key_length_inc(to,tmp_length); + } + memcpy(to, from+pack_length,tmp_length); + to+= tmp_length; + continue; + } + else + { + memcpy(to,from,(size_t) length); to+=length; + continue; /* Normal field */ + } + if ((bit= bit << 1) >= 256) + { + *packpos++ = (char) (uchar) flag; + bit=1; flag=0; + } + } + else + { + memcpy(to,from,(size_t) length); to+=length; + } + } + if (bit != 1) + *packpos= (char) (uchar) flag; + if (info->s->calc_checksum) + *to++=(char) info->checksum; + DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos))); + DBUG_RETURN((uint) (to-startpos)); +} /* _ma_rec_pack */ + + + +/* + Check if a record was correctly packed. Used only by mariachk + Returns 0 if record is ok. +*/ + +my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, + ulong packed_length, my_bool with_checksum) +{ + uint length,new_length,flag,bit,i; + char *pos,*end,*packpos,*to; + enum en_fieldtype type; + reg3 MARIA_COLUMNDEF *rec; + DBUG_ENTER("_ma_rec_check"); + + packpos=rec_buff; to= rec_buff+info->s->base.pack_bits; + rec=info->s->rec; + flag= *packpos; bit=1; + + for (i=info->s->base.fields ; i-- > 0; record+= length, rec++) + { + length=(uint) rec->length; + if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL) + { + if (type == FIELD_BLOB) + { + uint blob_length= + _ma_calc_blob_length(length-maria_portable_sizeof_char_ptr,record); + if (!blob_length && !(flag & bit)) + goto err; + if (blob_length) + to+=length - maria_portable_sizeof_char_ptr+ blob_length; + } + else if (type == FIELD_SKIP_ZERO) + { + if (memcmp((byte*) record,zero_string,length) == 0) + { + if (!(flag & bit)) + goto err; + } + else + to+=length; + } + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + pos= (byte*) record; end= (byte*) record + length; + if (type == FIELD_SKIP_ENDSPACE) + { /* Pack trailing spaces */ + while (end > record && *(end-1) == ' ') + end--; + } + else + { /* Pack pre-spaces */ + while (pos < end && *pos == ' ') + pos++; + } + new_length=(uint) (end-pos); + if (new_length +1 + test(rec->length > 255 && new_length > 127) + < length) + { + if (!(flag & bit)) + goto err; + if (rec->length > 255 && new_length > 127) + { + if (to[0] != (char) ((new_length & 127)+128) || + to[1] != (char) (new_length >> 7)) + goto err; + to+=2; + } + else if (*to++ != (char) new_length) + goto err; + to+=new_length; + } + else + to+=length; + } + else if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) record; + to+= 1+ tmp_length; + continue; + } + else + { + tmp_length= uint2korr(record); + to+= get_pack_length(tmp_length)+tmp_length; + } + continue; + } + else + { + to+=length; + continue; /* Normal field */ + } + if ((bit= bit << 1) >= 256) + { + flag= *++packpos; + bit=1; + } + } + else + to+= length; + } + if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || + (bit != 1 && (flag & ~(bit - 1)))) + goto err; + if (with_checksum && ((uchar) info->checksum != (uchar) *to)) + { + DBUG_PRINT("error",("wrong checksum for row")); + goto err; + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + + + /* Unpacks a record */ + /* Returns -1 and my_errno =HA_ERR_RECORD_DELETED if reclength isn't */ + /* right. Returns reclength (>0) if ok */ + +ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, + ulong found_length) +{ + uint flag,bit,length,rec_length,min_pack_length; + enum en_fieldtype type; + byte *from_end,*to_end,*packpos; + reg3 MARIA_COLUMNDEF *rec,*end_field; + DBUG_ENTER("_ma_rec_unpack"); + + to_end=to + info->s->base.reclength; + from_end=from+found_length; + flag= (uchar) *from; bit=1; packpos=from; + if (found_length < info->s->base.min_pack_length) + goto err; + from+= info->s->base.pack_bits; + min_pack_length=info->s->base.min_pack_length - info->s->base.pack_bits; + + for (rec=info->s->rec , end_field=rec+info->s->base.fields ; + rec < end_field ; to+= rec_length, rec++) + { + rec_length=rec->length; + if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL && + (type != FIELD_CHECK)) + { + if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec_length-1); + if (pack_length == 1) + { + length= (uint) *(uchar*) from; + if (length > rec_length-1) + goto err; + *to= *from++; + } + else + { + get_key_length(length, from); + if (length > rec_length-2) + goto err; + int2store(to,length); + } + if (from+length > from_end) + goto err; + memcpy(to+pack_length, from, length); + from+= length; + min_pack_length--; + continue; + } + if (flag & bit) + { + if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO) + bzero((byte*) to,rec_length); + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + if (rec->length > 255 && *from & 128) + { + if (from + 1 >= from_end) + goto err; + length= (*from & 127)+ ((uint) (uchar) *(from+1) << 7); from+=2; + } + else + { + if (from == from_end) + goto err; + length= (uchar) *from++; + } + min_pack_length--; + if (length >= rec_length || + min_pack_length + length > (uint) (from_end - from)) + goto err; + if (type == FIELD_SKIP_ENDSPACE) + { + memcpy(to,(byte*) from,(size_t) length); + bfill((byte*) to+length,rec_length-length,' '); + } + else + { + bfill((byte*) to,rec_length-length,' '); + memcpy(to+rec_length-length,(byte*) from,(size_t) length); + } + from+=length; + } + } + else if (type == FIELD_BLOB) + { + uint size_length=rec_length- maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length,from); + if ((ulong) (from_end-from) - size_length < blob_length || + min_pack_length > (uint) (from_end -(from+size_length+blob_length))) + goto err; + memcpy((byte*) to,(byte*) from,(size_t) size_length); + from+=size_length; + memcpy_fixed((byte*) to+size_length,(byte*) &from,sizeof(char*)); + from+=blob_length; + } + else + { + if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) + min_pack_length--; + if (min_pack_length + rec_length > (uint) (from_end - from)) + goto err; + memcpy(to,(byte*) from,(size_t) rec_length); from+=rec_length; + } + if ((bit= bit << 1) >= 256) + { + flag= (uchar) *++packpos; bit=1; + } + } + else + { + if (min_pack_length > (uint) (from_end - from)) + goto err; + min_pack_length-=rec_length; + memcpy(to, (byte*) from, (size_t) rec_length); + from+=rec_length; + } + } + if (info->s->calc_checksum) + from++; + if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1)))) + DBUG_RETURN(found_length); + +err: + my_errno= HA_ERR_WRONG_IN_RECORD; + DBUG_PRINT("error",("to_end: %lx -> %lx from_end: %lx -> %lx", + to,to_end,from,from_end)); + DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length); + DBUG_RETURN(MY_FILE_ERROR); +} /* _ma_rec_unpack */ + + + /* Calc length of blob. Update info in blobs->length */ + +ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record) +{ + ulong length; + MARIA_BLOB *blob,*end; + + for (length=0, blob= info->blobs, end=blob+info->s->base.blobs ; + blob != end; + blob++) + { + blob->length= _ma_calc_blob_length(blob->pack_length,record + blob->offset); + length+=blob->length; + } + return length; +} + + +ulong _ma_calc_blob_length(uint length, const byte *pos) +{ + switch (length) { + case 1: + return (uint) (uchar) *pos; + case 2: + return (uint) uint2korr(pos); + case 3: + return uint3korr(pos); + case 4: + return uint4korr(pos); + default: + break; + } + return 0; /* Impossible */ +} + + +void _ma_store_blob_length(byte *pos,uint pack_length,uint length) +{ + switch (pack_length) { + case 1: + *pos= (uchar) length; + break; + case 2: + int2store(pos,length); + break; + case 3: + int3store(pos,length); + break; + case 4: + int4store(pos,length); + default: + break; + } + return; +} + + + /* Read record from datafile */ + /* Returns 0 if ok, -1 if error */ + +int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) +{ + int flag; + uint b_type,left_length; + byte *to; + MARIA_BLOCK_INFO block_info; + File file; + DBUG_ENTER("maria_read_dynamic_record"); + + if (filepos != HA_OFFSET_ERROR) + { + LINT_INIT(to); + LINT_INIT(left_length); + file=info->dfile; + block_info.next_filepos=filepos; /* for easyer loop */ + flag=block_info.second_read=0; + do + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file <= block_info.next_filepos && + flush_io_cache(&info->rec_cache)) + goto err; + info->rec_cache.seek_not_done=1; + if ((b_type= _ma_get_block_info(&block_info,file, + block_info.next_filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED)) + my_errno=HA_ERR_RECORD_DELETED; + goto err; + } + if (flag == 0) /* First block */ + { + flag=1; + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + goto panic; + if (info->s->base.blobs) + { + if (!(to=_ma_alloc_rec_buff(info, block_info.rec_len, + &info->rec_buff))) + goto err; + } + else + to= info->rec_buff; + left_length=block_info.rec_len; + } + if (left_length < block_info.data_len || ! block_info.data_len) + goto panic; /* Wrong linked record */ + if (info->s->file_read(info,(byte*) to,block_info.data_len,block_info.filepos, + MYF(MY_NABP))) + goto panic; + left_length-=block_info.data_len; + to+=block_info.data_len; + } while (left_length); + + info->update|= HA_STATE_AKTIV; /* We have a aktive record */ + fast_ma_writeinfo(info); + DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != + MY_FILE_ERROR ? 0 : -1); + } + fast_ma_writeinfo(info); + DBUG_RETURN(-1); /* Wrong data to read */ + +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; +err: + VOID(_ma_writeinfo(info,0)); + DBUG_RETURN(-1); +} + + /* compare unique constraint between stored rows */ + +int _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, my_off_t pos) +{ + byte *rec_buff,*old_record; + int error; + DBUG_ENTER("_ma_cmp_dynamic_unique"); + + if (!(old_record=my_alloca(info->s->base.reclength))) + DBUG_RETURN(1); + + /* Don't let the compare destroy blobs that may be in use */ + rec_buff=info->rec_buff; + if (info->s->base.blobs) + info->rec_buff=0; + error= _ma_read_dynamic_record(info,pos,old_record); + if (!error) + error=_ma_unique_comp(def, record, old_record, def->null_are_equal); + if (info->s->base.blobs) + { + my_free(_ma_get_rec_buff_ptr(info, info->rec_buff), MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff=rec_buff; + } + my_afree(old_record); + DBUG_RETURN(error); +} + + + /* Compare of record one disk with packed record in memory */ + +int _ma_cmp_dynamic_record(register MARIA_HA *info, register const byte *record) +{ + uint flag,reclength,b_type; + my_off_t filepos; + byte *buffer; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("_ma_cmp_dynamic_record"); + + /* We are going to do changes; dont let anybody disturb */ + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (info->opt_flag & WRITE_CACHE_USED) + { + info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK); + if (flush_io_cache(&info->rec_cache)) + DBUG_RETURN(-1); + } + info->rec_cache.seek_not_done=1; + + /* If nobody have touched the database we don't have to test rec */ + + buffer=info->rec_buff; + if ((info->opt_flag & READ_CHECK_USED)) + { /* If check isn't disabled */ + if (info->s->base.blobs) + { + if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,record)))) + DBUG_RETURN(-1); + } + reclength= _ma_rec_pack(info,buffer,record); + record= buffer; + + filepos=info->lastpos; + flag=block_info.second_read=0; + block_info.next_filepos=filepos; + while (reclength > 0) + { + if ((b_type= _ma_get_block_info(&block_info,info->dfile, + block_info.next_filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED)) + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + if (flag == 0) /* First block */ + { + flag=1; + if (reclength != block_info.rec_len) + { + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + } else if (reclength < block_info.data_len) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } + reclength-=block_info.data_len; + if (_ma_cmp_buffer(info->dfile,record,block_info.filepos, + block_info.data_len)) + { + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + flag=1; + record+=block_info.data_len; + } + } + my_errno=0; +err: + if (buffer != info->rec_buff) + my_afree((gptr) buffer); + DBUG_RETURN(my_errno); +} + + + /* Compare file to buffert */ + +static int _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length) +{ + uint next_length; + char temp_buff[IO_SIZE*2]; + DBUG_ENTER("_ma_cmp_buffer"); + + next_length= IO_SIZE*2 - (uint) (filepos & (IO_SIZE-1)); + + while (length > IO_SIZE*2) + { + if (my_pread(file,temp_buff,next_length,filepos, MYF(MY_NABP)) || + memcmp((byte*) buff,temp_buff,next_length)) + goto err; + filepos+=next_length; + buff+=next_length; + length-= next_length; + next_length=IO_SIZE*2; + } + if (my_pread(file,temp_buff,length,filepos,MYF(MY_NABP))) + goto err; + DBUG_RETURN(memcmp((byte*) buff,temp_buff,length)); +err: + DBUG_RETURN(1); +} + + +int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, + register my_off_t filepos, + my_bool skip_deleted_blocks) +{ + int flag,info_read,save_errno; + uint left_len,b_type; + byte *to; + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_dynamic_record"); + + info_read=0; + LINT_INIT(to); + + if (info->lock_type == F_UNLCK) + { +#ifndef UNSAFE_LOCKING + if (share->tot_locks == 0) + { + if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, + MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) + DBUG_RETURN(my_errno); + } +#else + info->tmp_lock_type=F_RDLCK; +#endif + } + else + info_read=1; /* memory-keyinfoblock is ok */ + + flag=block_info.second_read=0; + left_len=1; + do + { + if (filepos >= info->state->data_file_length) + { + if (!info_read) + { /* Check if changed */ + info_read=1; + info->rec_cache.seek_not_done=1; + if (_ma_state_info_read_dsk(share->kfile,&share->state,1)) + goto panic; + } + if (filepos >= info->state->data_file_length) + { + my_errno= HA_ERR_END_OF_FILE; + goto err; + } + } + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, + sizeof(block_info.header), + (!flag && skip_deleted_blocks ? READING_NEXT : 0) | + READING_HEADER)) + goto panic; + b_type= _ma_get_block_info(&block_info,-1,filepos); + } + else + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file <= filepos && + flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + info->rec_cache.seek_not_done=1; + b_type= _ma_get_block_info(&block_info,info->dfile,filepos); + } + + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + && skip_deleted_blocks) + { + filepos=block_info.filepos+block_info.block_len; + block_info.second_read=0; + continue; /* Search after next_record */ + } + if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + { + my_errno=HA_ERR_RECORD_DELETED; + info->lastpos=block_info.filepos; + info->nextpos=block_info.filepos+block_info.block_len; + } + goto err; + } + if (flag == 0) /* First block */ + { + if (block_info.rec_len > (uint) share->base.max_pack_length) + goto panic; + info->lastpos=filepos; + if (share->base.blobs) + { + if (!(to= _ma_alloc_rec_buff(info, block_info.rec_len, + &info->rec_buff))) + goto err; + } + else + to= info->rec_buff; + left_len=block_info.rec_len; + } + if (left_len < block_info.data_len) + goto panic; /* Wrong linked record */ + + /* copy information that is already read */ + { + uint offset=(uint) (block_info.filepos - filepos); + uint tmp_length= (sizeof(block_info.header) - offset); + filepos=block_info.filepos; + + if (tmp_length > block_info.data_len) + tmp_length= block_info.data_len; + if (tmp_length) + { + memcpy((byte*) to, block_info.header+offset,tmp_length); + block_info.data_len-=tmp_length; + left_len-=tmp_length; + to+=tmp_length; + filepos+=tmp_length; + } + } + /* read rest of record from file */ + if (block_info.data_len) + { + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) to,filepos, + block_info.data_len, + (!flag && skip_deleted_blocks) ? READING_NEXT :0)) + goto panic; + } + else + { + /* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */ + if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP))) + { + if (my_errno == -1) + my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */ + goto err; + } + } + } + if (flag++ == 0) + { + info->nextpos=block_info.filepos+block_info.block_len; + skip_deleted_blocks=0; + } + left_len-=block_info.data_len; + to+=block_info.data_len; + filepos=block_info.next_filepos; + } while (left_len); + + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + fast_ma_writeinfo(info); + if (_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != + MY_FILE_ERROR) + DBUG_RETURN(0); + DBUG_RETURN(my_errno); /* Wrong record */ + +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */ +err: + save_errno=my_errno; + VOID(_ma_writeinfo(info,0)); + DBUG_RETURN(my_errno=save_errno); +} + + + /* Read and process header from a dynamic-record-file */ + +uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) +{ + uint return_val=0; + uchar *header=info->header; + + if (file >= 0) + { + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); + if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) != + sizeof(info->header)) + goto err; + } + DBUG_DUMP("header",(byte*) header,MARIA_BLOCK_INFO_HEADER_LENGTH); + if (info->second_read) + { + if (info->header[0] <= 6 || info->header[0] == 13) + return_val=BLOCK_SYNC_ERROR; + } + else + { + if (info->header[0] > 6 && info->header[0] != 13) + return_val=BLOCK_SYNC_ERROR; + } + info->next_filepos= HA_OFFSET_ERROR; /* Dummy if no next block */ + + switch (info->header[0]) { + case 0: + if ((info->block_len=(uint) mi_uint3korr(header+1)) < + MARIA_MIN_BLOCK_LENGTH || + (info->block_len & (MARIA_DYN_ALIGN_SIZE -1))) + goto err; + info->filepos=filepos; + info->next_filepos=mi_sizekorr(header+4); + info->prev_filepos=mi_sizekorr(header+12); +#if SIZEOF_OFF_T == 4 + if ((mi_uint4korr(header+4) != 0 && + (mi_uint4korr(header+4) != (ulong) ~0 || + info->next_filepos != (ulong) ~0)) || + (mi_uint4korr(header+12) != 0 && + (mi_uint4korr(header+12) != (ulong) ~0 || + info->prev_filepos != (ulong) ~0))) + goto err; +#endif + return return_val | BLOCK_DELETED; /* Deleted block */ + + case 1: + info->rec_len=info->data_len=info->block_len=mi_uint2korr(header+1); + info->filepos=filepos+3; + return return_val | BLOCK_FIRST | BLOCK_LAST; + case 2: + info->rec_len=info->data_len=info->block_len=mi_uint3korr(header+1); + info->filepos=filepos+4; + return return_val | BLOCK_FIRST | BLOCK_LAST; + + case 13: + info->rec_len=mi_uint4korr(header+1); + info->block_len=info->data_len=mi_uint3korr(header+5); + info->next_filepos=mi_sizekorr(header+8); + info->second_read=1; + info->filepos=filepos+16; + return return_val | BLOCK_FIRST; + + case 3: + info->rec_len=info->data_len=mi_uint2korr(header+1); + info->block_len=info->rec_len+ (uint) header[3]; + info->filepos=filepos+4; + return return_val | BLOCK_FIRST | BLOCK_LAST; + case 4: + info->rec_len=info->data_len=mi_uint3korr(header+1); + info->block_len=info->rec_len+ (uint) header[4]; + info->filepos=filepos+5; + return return_val | BLOCK_FIRST | BLOCK_LAST; + + case 5: + info->rec_len=mi_uint2korr(header+1); + info->block_len=info->data_len=mi_uint2korr(header+3); + info->next_filepos=mi_sizekorr(header+5); + info->second_read=1; + info->filepos=filepos+13; + return return_val | BLOCK_FIRST; + case 6: + info->rec_len=mi_uint3korr(header+1); + info->block_len=info->data_len=mi_uint3korr(header+4); + info->next_filepos=mi_sizekorr(header+7); + info->second_read=1; + info->filepos=filepos+15; + return return_val | BLOCK_FIRST; + + /* The following blocks are identical to 1-6 without rec_len */ + case 7: + info->data_len=info->block_len=mi_uint2korr(header+1); + info->filepos=filepos+3; + return return_val | BLOCK_LAST; + case 8: + info->data_len=info->block_len=mi_uint3korr(header+1); + info->filepos=filepos+4; + return return_val | BLOCK_LAST; + + case 9: + info->data_len=mi_uint2korr(header+1); + info->block_len=info->data_len+ (uint) header[3]; + info->filepos=filepos+4; + return return_val | BLOCK_LAST; + case 10: + info->data_len=mi_uint3korr(header+1); + info->block_len=info->data_len+ (uint) header[4]; + info->filepos=filepos+5; + return return_val | BLOCK_LAST; + + case 11: + info->data_len=info->block_len=mi_uint2korr(header+1); + info->next_filepos=mi_sizekorr(header+3); + info->second_read=1; + info->filepos=filepos+11; + return return_val; + case 12: + info->data_len=info->block_len=mi_uint3korr(header+1); + info->next_filepos=mi_sizekorr(header+4); + info->second_read=1; + info->filepos=filepos+12; + return return_val; + } + +err: + my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */ + return BLOCK_ERROR; +} diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c new file mode 100644 index 00000000000..bd5d4280c9d --- /dev/null +++ b/storage/maria/ma_extra.c @@ -0,0 +1,426 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function); + + +/* + Set options and buffers to optimize table handling + + SYNOPSIS + maria_extra() + info open table + function operation + extra_arg Pointer to extra argument (normally pointer to ulong) + Used when function is one of: + HA_EXTRA_WRITE_CACHE + HA_EXTRA_CACHE + RETURN VALUES + 0 ok + # error +*/ + +int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg) +{ + int error=0; + ulong cache_size; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_extra"); + DBUG_PRINT("enter",("function: %d",(int) function)); + + switch (function) { + case HA_EXTRA_RESET: + /* + Free buffers and reset the following flags: + EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK + + If the row buffer cache is large (for dynamic tables), reduce it + to save memory. + */ + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + } + if (share->base.blobs) + _ma_alloc_rec_buff(info, -1, &info->rec_buff); +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + info->quick_mode=0; + /* Fall through */ + + case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ + info->lastinx= 0; /* Use first index as def */ + info->last_search_keypage=info->lastpos= HA_OFFSET_ERROR; + info->page_changed=1; + /* Next/prev gives first/last */ + if (info->opt_flag & READ_CACHE_USED) + { + reinit_io_cache(&info->rec_cache,READ_CACHE,0, + (pbool) (info->lock_type != F_UNLCK), + (pbool) test(info->update & HA_STATE_ROW_CHANGED) + ); + } + info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | + HA_STATE_PREV_FOUND); + break; + case HA_EXTRA_CACHE: + if (info->lock_type == F_UNLCK && + (share->options & HA_OPTION_PACK_RECORD)) + { + error=1; /* Not possibly if not locked */ + my_errno=EACCES; + break; + } + if (info->s->file_map) /* Don't use cache if mmap */ + break; +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if ((share->options & HA_OPTION_COMPRESS_RECORD)) + { + pthread_mutex_lock(&share->intern_lock); + if (_ma_memmap_file(info)) + { + /* We don't nead MADV_SEQUENTIAL if small file */ + madvise(share->file_map,share->state.state.data_file_length, + share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ? + MADV_RANDOM : MADV_SEQUENTIAL); + pthread_mutex_unlock(&share->intern_lock); + break; + } + pthread_mutex_unlock(&share->intern_lock); + } +#endif + if (info->opt_flag & WRITE_CACHE_USED) + { + info->opt_flag&= ~WRITE_CACHE_USED; + if ((error=end_io_cache(&info->rec_cache))) + break; + } + if (!(info->opt_flag & + (READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED))) + { + cache_size= (extra_arg ? *(ulong*) extra_arg : + my_default_record_cache_size); + if (!(init_io_cache(&info->rec_cache,info->dfile, + (uint) min(info->state->data_file_length+1, + cache_size), + READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK), + MYF(share->write_flag & MY_WAIT_IF_FULL)))) + { + info->opt_flag|=READ_CACHE_USED; + info->update&= ~HA_STATE_ROW_CHANGED; + } + if (share->concurrent_insert) + info->rec_cache.end_of_file=info->state->data_file_length; + } + break; + case HA_EXTRA_REINIT_CACHE: + if (info->opt_flag & READ_CACHE_USED) + { + reinit_io_cache(&info->rec_cache,READ_CACHE,info->nextpos, + (pbool) (info->lock_type != F_UNLCK), + (pbool) test(info->update & HA_STATE_ROW_CHANGED)); + info->update&= ~HA_STATE_ROW_CHANGED; + if (share->concurrent_insert) + info->rec_cache.end_of_file=info->state->data_file_length; + } + break; + case HA_EXTRA_WRITE_CACHE: + if (info->lock_type == F_UNLCK) + { + error=1; /* Not possibly if not locked */ + break; + } + + cache_size= (extra_arg ? *(ulong*) extra_arg : + my_default_record_cache_size); + if (!(info->opt_flag & + (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) && + !share->state.header.uniques) + if (!(init_io_cache(&info->rec_cache,info->dfile, cache_size, + WRITE_CACHE,info->state->data_file_length, + (pbool) (info->lock_type != F_UNLCK), + MYF(share->write_flag & MY_WAIT_IF_FULL)))) + { + info->opt_flag|=WRITE_CACHE_USED; + info->update&= ~(HA_STATE_ROW_CHANGED | + HA_STATE_WRITE_AT_END | + HA_STATE_EXTEND_BLOCK); + } + break; + case HA_EXTRA_PREPARE_FOR_UPDATE: + if (info->s->data_file_type != DYNAMIC_RECORD) + break; + /* Remove read/write cache if dynamic rows */ + case HA_EXTRA_NO_CACHE: + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + /* Sergei will insert full text index caching here */ + } +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + break; + case HA_EXTRA_FLUSH_CACHE: + if (info->opt_flag & WRITE_CACHE_USED) + { + if ((error=flush_io_cache(&info->rec_cache))) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + } + break; + case HA_EXTRA_NO_READCHECK: + info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */ + break; + case HA_EXTRA_READCHECK: + info->opt_flag|= READ_CHECK_USED; + break; + case HA_EXTRA_KEYREAD: /* Read only keys to record */ + case HA_EXTRA_REMEMBER_POS: + info->opt_flag |= REMEMBER_OLD_POS; + bmove((byte*) info->lastkey+share->base.max_key_length*2, + (byte*) info->lastkey,info->lastkey_length); + info->save_update= info->update; + info->save_lastinx= info->lastinx; + info->save_lastpos= info->lastpos; + info->save_lastkey_length=info->lastkey_length; + if (function == HA_EXTRA_REMEMBER_POS) + break; + /* fall through */ + case HA_EXTRA_KEYREAD_CHANGE_POS: + info->opt_flag |= KEY_READ_USED; + info->read_record= _ma_read_key_record; + break; + case HA_EXTRA_NO_KEYREAD: + case HA_EXTRA_RESTORE_POS: + if (info->opt_flag & REMEMBER_OLD_POS) + { + bmove((byte*) info->lastkey, + (byte*) info->lastkey+share->base.max_key_length*2, + info->save_lastkey_length); + info->update= info->save_update | HA_STATE_WRITTEN; + info->lastinx= info->save_lastinx; + info->lastpos= info->save_lastpos; + info->lastkey_length=info->save_lastkey_length; + } + info->read_record= share->read_record; + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + break; + case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */ + info->lock_type= F_EXTRA_LCK; /* Simulate as locked */ + break; + case HA_EXTRA_WAIT_LOCK: + info->lock_wait=0; + break; + case HA_EXTRA_NO_WAIT_LOCK: + info->lock_wait=MY_DONT_WAIT; + break; + case HA_EXTRA_NO_KEYS: + if (info->lock_type == F_UNLCK) + { + error=1; /* Not possibly if not lock */ + break; + } + if (maria_is_any_key_active(share->state.key_map)) + { + MARIA_KEYDEF *key=share->keyinfo; + uint i; + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1) + { + maria_clear_key_active(share->state.key_map, i); + info->update|= HA_STATE_CHANGED; + } + } + + if (!share->changed) + { + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED; + share->changed=1; /* Update on close */ + if (!share->global_changed) + { + share->global_changed=1; + share->state.open_count++; + } + } + share->state.state= *info->state; + error=_ma_state_info_write(share->kfile,&share->state,1 | 2); + } + break; + case HA_EXTRA_FORCE_REOPEN: + pthread_mutex_lock(&THR_LOCK_maria); + share->last_version= 0L; /* Impossible version */ + pthread_mutex_unlock(&THR_LOCK_maria); + break; + case HA_EXTRA_PREPARE_FOR_DELETE: + pthread_mutex_lock(&THR_LOCK_maria); + share->last_version= 0L; /* Impossible version */ +#ifdef __WIN__ + /* Close the isam and data files as Win32 can't drop an open table */ + pthread_mutex_lock(&share->intern_lock); + if (flush_key_blocks(share->key_cache, share->kfile, + (function == HA_EXTRA_FORCE_REOPEN ? + FLUSH_RELEASE : FLUSH_IGNORE_CHANGED))) + { + error=my_errno; + share->changed=1; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + } + if (info->lock_type != F_UNLCK && ! info->was_locked) + { + info->was_locked=info->lock_type; + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + info->lock_type = F_UNLCK; + } + if (share->kfile >= 0) + _ma_decrement_open_count(info); + if (share->kfile >= 0 && my_close(share->kfile,MYF(0))) + error=my_errno; + { + LIST *list_element ; + for (list_element=maria_open_list ; + list_element ; + list_element=list_element->next) + { + MARIA_HA *tmpinfo=(MARIA_HA*) list_element->data; + if (tmpinfo->s == info->s) + { + if (tmpinfo->dfile >= 0 && my_close(tmpinfo->dfile,MYF(0))) + error = my_errno; + tmpinfo->dfile= -1; + } + } + } + share->kfile= -1; /* Files aren't open anymore */ + pthread_mutex_unlock(&share->intern_lock); +#endif + pthread_mutex_unlock(&THR_LOCK_maria); + break; + case HA_EXTRA_FLUSH: + if (!share->temporary) + flush_key_blocks(share->key_cache, share->kfile, FLUSH_KEEP); +#ifdef HAVE_PWRITE + _ma_decrement_open_count(info); +#endif + if (share->not_flushed) + { + share->not_flushed=0; + if (my_sync(share->kfile, MYF(0))) + error= my_errno; + if (my_sync(info->dfile, MYF(0))) + error= my_errno; + if (error) + { + share->changed=1; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + } + if (share->base.blobs) + _ma_alloc_rec_buff(info, -1, &info->rec_buff); + break; + case HA_EXTRA_NORMAL: /* Theese isn't in use */ + info->quick_mode=0; + break; + case HA_EXTRA_QUICK: + info->quick_mode=1; + break; + case HA_EXTRA_NO_ROWS: + if (!share->state.header.uniques) + info->opt_flag|= OPT_NO_ROWS; + break; + case HA_EXTRA_PRELOAD_BUFFER_SIZE: + info->preload_buff_size= *((ulong *) extra_arg); + break; + case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + case HA_EXTRA_CHANGE_KEY_TO_DUP: + maria_extra_keyflag(info, function); + break; + case HA_EXTRA_MMAP: +#ifdef HAVE_MMAP + pthread_mutex_lock(&share->intern_lock); + if (!share->file_map) + { + if (_ma_dynmap_file(info, share->state.state.data_file_length)) + { + DBUG_PRINT("warning",("mmap failed: errno: %d",errno)); + error= my_errno= errno; + } + else + { + share->file_read= _ma_mmap_pread; + share->file_write= _ma_mmap_pwrite; + } + } + pthread_mutex_unlock(&share->intern_lock); +#endif + break; + case HA_EXTRA_KEY_CACHE: + case HA_EXTRA_NO_KEY_CACHE: + default: + break; + } + { + char tmp[1]; + tmp[0]=function; + maria_log_command(MARIA_LOG_EXTRA,info,(byte*) tmp,1,error); + } + DBUG_RETURN(error); +} /* maria_extra */ + + +/* + Start/Stop Inserting Duplicates Into a Table, WL#1648. + */ +static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function) +{ + uint idx; + + for (idx= 0; idx< info->s->base.keys; idx++) + { + switch (function) { + case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + info->s->keyinfo[idx].flag|= HA_NOSAME; + break; + case HA_EXTRA_CHANGE_KEY_TO_DUP: + info->s->keyinfo[idx].flag&= ~(HA_NOSAME); + break; + default: + break; + } + } +} diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c new file mode 100644 index 00000000000..2b8e0d8b97a --- /dev/null +++ b/storage/maria/ma_ft_boolean_search.c @@ -0,0 +1,955 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* TODO: add caching - pre-read several index entries at once */ + +/* + Added optimization for full-text queries with plus-words. It was + implemented by sharing maximal document id (max_docid) variable + inside plus subtree. max_docid could be used by any word in plus + subtree, but it could be updated by plus-word only. + + The idea is: there is no need to search for docid smaller than + biggest docid inside current plus subtree. + + Examples: + +word1 word2 + share same max_docid + max_docid updated by word1 + +word1 +(word2 word3) + share same max_docid + max_docid updated by word1 + +(word1 -word2) +(+word3 word4) + share same max_docid + max_docid updated by word3 +*/ + +#define FT_CORE +#include "ma_ftdefs.h" + +/* search with boolean queries */ + +static double _wghts[11]= +{ + 0.131687242798354, + 0.197530864197531, + 0.296296296296296, + 0.444444444444444, + 0.666666666666667, + 1.000000000000000, + 1.500000000000000, + 2.250000000000000, + 3.375000000000000, + 5.062500000000000, + 7.593750000000000}; +static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */ + +static double _nwghts[11]= +{ + -0.065843621399177, + -0.098765432098766, + -0.148148148148148, + -0.222222222222222, + -0.333333333333334, + -0.500000000000000, + -0.750000000000000, + -1.125000000000000, + -1.687500000000000, + -2.531250000000000, + -3.796875000000000}; +static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ + +#define FTB_FLAG_TRUNC 1 +/* At most one of the following flags can be set */ +#define FTB_FLAG_YES 2 +#define FTB_FLAG_NO 4 +#define FTB_FLAG_WONLY 8 + +typedef struct st_ftb_expr FTB_EXPR; +struct st_ftb_expr +{ + FTB_EXPR *up; + uint flags; +/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + my_off_t docid[2]; + my_off_t max_docid; + float weight; + float cur_weight; + LIST *phrase; /* phrase words */ + LIST *document; /* for phrase search */ + uint yesses; /* number of "yes" words matched */ + uint nos; /* number of "no" words matched */ + uint ythresh; /* number of "yes" words in expr */ + uint yweaks; /* number of "yes" words for scan only */ +}; + +typedef struct st_ftb_word +{ + FTB_EXPR *up; + uint flags; +/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + my_off_t docid[2]; /* for index search and for scan */ + my_off_t key_root; + my_off_t *max_docid; + MARIA_KEYDEF *keyinfo; + struct st_ftb_word *prev; + float weight; + uint ndepth; + uint len; + uchar off; + byte word[1]; +} FTB_WORD; + +typedef struct st_ft_info +{ + struct _ft_vft *please; + MARIA_HA *info; + CHARSET_INFO *charset; + FTB_EXPR *root; + FTB_WORD **list; + FTB_WORD *last_word; + MEM_ROOT mem_root; + QUEUE queue; + TREE no_dupes; + my_off_t lastpos; + uint keynr; + uchar with_scan; + enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state; +} FTB; + +static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) +{ + int i; + + /* if a==curdoc, take it as a < b */ + if (v && a->docid[0] == *v) + return -1; + + /* ORDER BY docid, ndepth DESC */ + i=CMP_NUM(a->docid[0], b->docid[0]); + if (!i) + i=CMP_NUM(b->ndepth,a->ndepth); + return i; +} + +static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) +{ + /* ORDER BY word DESC, ndepth DESC */ + int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + (uchar*) (*a)->word+1,(*a)->len-1,0,0); + if (!i) + i=CMP_NUM((*b)->ndepth,(*a)->ndepth); + return i; +} + + +typedef struct st_my_ftb_param +{ + FTB *ftb; + FTB_EXPR *ftbe; + byte *up_quot; + uint depth; +} MY_FTB_PARAM; + + +static int ftb_query_add_word(void *param, char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *info) +{ + MY_FTB_PARAM *ftb_param= (MY_FTB_PARAM *)param; + FTB_WORD *ftbw; + FTB_EXPR *ftbe, *tmp_expr; + FT_WORD *phrase_word; + LIST *tmp_element; + int r= info->weight_adjust; + float weight= (float) + (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; + + switch (info->type) { + case FT_TOKEN_WORD: + ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root, + sizeof(FTB_WORD) + + (info->trunc ? HA_MAX_KEY_BUFF : + word_len * ftb_param->ftb->charset->mbmaxlen + + HA_FT_WLEN + + ftb_param->ftb->info->s->rec_reflength)); + ftbw->len= word_len + 1; + ftbw->flags= 0; + ftbw->off= 0; + if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES; + if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO; + if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC; + ftbw->weight= weight; + ftbw->up= ftb_param->ftbe; + ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR; + ftbw->ndepth= (info->yesno < 0) + ftb_param->depth; + ftbw->key_root= HA_OFFSET_ERROR; + memcpy(ftbw->word + 1, word, word_len); + ftbw->word[0]= word_len; + if (info->yesno > 0) ftbw->up->ythresh++; + ftb_param->ftb->queue.max_elements++; + ftbw->prev= ftb_param->ftb->last_word; + ftb_param->ftb->last_word= ftbw; + ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC); + for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up) + if (! (tmp_expr->flags & FTB_FLAG_YES)) + break; + ftbw->max_docid= &tmp_expr->max_docid; + /* fall through */ + case FT_TOKEN_STOPWORD: + if (! ftb_param->up_quot) break; + phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD)); + tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST)); + phrase_word->pos= word; + phrase_word->len= word_len; + tmp_element->data= (void *)phrase_word; + ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element); + /* Allocate document list at this point. + It allows to avoid huge amount of allocs/frees for each row.*/ + tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST)); + tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD)); + ftb_param->ftbe->document= + list_add(ftb_param->ftbe->document, tmp_element); + break; + case FT_TOKEN_LEFT_PAREN: + ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->flags= 0; + if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES; + if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO; + ftbe->weight= weight; + ftbe->up= ftb_param->ftbe; + ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0; + ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR; + ftbe->phrase= NULL; + ftbe->document= 0; + if (info->quot) ftb_param->ftb->with_scan|= 2; + if (info->yesno > 0) ftbe->up->ythresh++; + ftb_param->ftbe= ftbe; + ftb_param->depth++; + ftb_param->up_quot= info->quot; + break; + case FT_TOKEN_RIGHT_PAREN: + if (ftb_param->ftbe->document) + { + /* Circuit document list */ + for (tmp_element= ftb_param->ftbe->document; + tmp_element->next; tmp_element= tmp_element->next) /* no-op */; + tmp_element->next= ftb_param->ftbe->document; + ftb_param->ftbe->document->prev= tmp_element; + } + info->quot= 0; + if (ftb_param->ftbe->up) + { + DBUG_ASSERT(ftb_param->depth); + ftb_param->ftbe= ftb_param->ftbe->up; + ftb_param->depth--; + ftb_param->up_quot= 0; + } + break; + case FT_TOKEN_EOF: + default: + break; + } + return(0); +} + + +static int ftb_parse_query_internal(void *param, char *query, int len) +{ + MY_FTB_PARAM *ftb_param= (MY_FTB_PARAM *)param; + MYSQL_FTPARSER_BOOLEAN_INFO info; + CHARSET_INFO *cs= ftb_param->ftb->charset; + char **start= &query; + char *end= query + len; + FT_WORD w; + + info.prev= ' '; + info.quot= 0; + while (maria_ft_get_word(cs, start, end, &w, &info)) + ftb_query_add_word(param, w.pos, w.len, &info); + return(0); +} + + +static void _ftb_parse_query(FTB *ftb, byte *query, uint len, + struct st_mysql_ftparser *parser) +{ + MYSQL_FTPARSER_PARAM *param; + MY_FTB_PARAM ftb_param; + DBUG_ENTER("_ftb_parse_query"); + DBUG_ASSERT(parser); + + if (ftb->state != UNINITIALIZED) + DBUG_VOID_RETURN; + + ftb_param.ftb= ftb; + ftb_param.depth= 0; + ftb_param.ftbe= ftb->root; + ftb_param.up_quot= 0; + + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr))) + DBUG_VOID_RETURN; + param->mysql_parse= ftb_parse_query_internal; + param->mysql_add_word= ftb_query_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->doc= query; + param->length= len; + param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO; + parser->parse(param); + DBUG_VOID_RETURN; +} + + +static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)), + const void *a,const void *b) +{ + return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b))); +} + +/* returns 1 if the search was finished (must-word wasn't found) */ +static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) +{ + int r; + int subkeys=1; + my_bool can_go_down; + MARIA_HA *info=ftb->info; + uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength; + byte *lastkey_buf=ftbw->word+ftbw->off; + LINT_INIT(off); + + if (ftbw->flags & FTB_FLAG_TRUNC) + lastkey_buf+=ftbw->len; + + if (init_search) + { + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + + r= _ma_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len, + SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root); + } + else + { + uint sflag= SEARCH_BIGGER; + if (ftbw->docid[0] < *ftbw->max_docid) + { + sflag|= SEARCH_SAME; + _ma_dpointer(info, (uchar *)(ftbw->word + ftbw->len + HA_FT_WLEN), + *ftbw->max_docid); + } + r= _ma_search(info, ftbw->keyinfo, (uchar*) lastkey_buf, + USE_WHOLE_KEY, sflag, ftbw->key_root); + } + + can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC))); + /* Skip rows inserted by concurrent insert */ + while (!r) + { + if (can_go_down) + { + /* going down ? */ + off=info->lastkey_length-extra; + subkeys=ft_sintXkorr(info->lastkey+off); + } + if (subkeys<0 || info->lastpos < info->state->data_file_length) + break; + r= _ma_search_next(info, ftbw->keyinfo, info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, ftbw->key_root); + } + + if (!r && !ftbw->off) + { + r= ha_compare_text(ftb->charset, + info->lastkey+1, + info->lastkey_length-extra-1, + (uchar*) ftbw->word+1, + ftbw->len-1, + (my_bool) (ftbw->flags & FTB_FLAG_TRUNC),0); + } + + if (r) /* not found */ + { + if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC)) + { + ftbw->docid[0]=HA_OFFSET_ERROR; + if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0) + { + /* + This word MUST BE present in every document returned, + so we can stop the search right now + */ + ftb->state=INDEX_DONE; + return 1; /* search is done */ + } + else + return 0; + } + + /* going up to the first-level tree to continue search there */ + _ma_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root); + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + ftbw->off=0; + return _ft2_search(ftb, ftbw, 0); + } + + /* matching key found */ + memcpy(lastkey_buf, info->lastkey, info->lastkey_length); + if (lastkey_buf == ftbw->word) + ftbw->len=info->lastkey_length-extra; + + /* going down ? */ + if (subkeys<0) + { + /* + yep, going down, to the second-level tree + TODO here: subkey-based optimization + */ + ftbw->off=off; + ftbw->key_root=info->lastpos; + ftbw->keyinfo=& info->s->ft2_keyinfo; + r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root); + DBUG_ASSERT(r==0); /* found something */ + memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length); + } + ftbw->docid[0]=info->lastpos; + if (ftbw->flags & FTB_FLAG_YES) + *ftbw->max_docid= info->lastpos; + return 0; +} + +static void _ftb_init_index_search(FT_INFO *ftb) +{ + int i; + FTB_WORD *ftbw; + + if ((ftb->state != READY && ftb->state !=INDEX_DONE) || + ftb->keynr == NO_SUCH_KEY) + return; + ftb->state=INDEX_SEARCH; + + for (i=ftb->queue.elements; i; i--) + { + ftbw=(FTB_WORD *)(ftb->queue.root[i]); + + if (ftbw->flags & FTB_FLAG_TRUNC) + { + /* + special treatment for truncation operator + 1. there are some (besides this) +words + | no need to search in the index, it can never ADD new rows + | to the result, and to remove half-matched rows we do scan anyway + 2. -trunc* + | same as 1. + 3. in 1 and 2, +/- need not be on the same expr. level, + but can be on any upper level, as in +word +(trunc1* trunc2*) + 4. otherwise + | We have to index-search for this prefix. + | It may cause duplicates, as in the index (sorted by ) + | + | + | + | Searching for "aa*" will find row1 twice... + */ + FTB_EXPR *ftbe; + for (ftbe=(FTB_EXPR*)ftbw; + ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC); + ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up) + { + if (ftbe->flags & FTB_FLAG_NO || /* 2 */ + ftbe->up->ythresh - ftbe->up->yweaks >1) /* 1 */ + { + FTB_EXPR *top_ftbe=ftbe->up; + ftbw->docid[0]=HA_OFFSET_ERROR; + for (ftbe=(FTB_EXPR *)ftbw; + ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO); + ftbe=ftbe->up) + ftbe->up->yweaks++; + ftbe=0; + break; + } + } + if (!ftbe) + continue; + /* 4 */ + if (!is_tree_inited(& ftb->no_dupes)) + init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t), + _ftb_no_dupes_cmp,0,0,0); + else + reset_tree(& ftb->no_dupes); + } + + ftbw->off=0; /* in case of reinit */ + if (_ft2_search(ftb, ftbw, 1)) + return; + } + queue_fix(& ftb->queue); +} + + +FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr, byte *query, + uint query_len, CHARSET_INFO *cs) +{ + FTB *ftb; + FTB_EXPR *ftbe; + FTB_WORD *ftbw; + + if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) + return 0; + ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean; + ftb->state=UNINITIALIZED; + ftb->info=info; + ftb->keynr=keynr; + ftb->charset=cs; + DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset); + ftb->with_scan=0; + ftb->lastpos=HA_OFFSET_ERROR; + bzero(& ftb->no_dupes, sizeof(TREE)); + ftb->last_word= 0; + + init_alloc_root(&ftb->mem_root, 1024, 1024); + ftb->queue.max_elements= 0; + if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)))) + goto err; + ftbe->weight=1; + ftbe->flags=FTB_FLAG_YES; + ftbe->nos=1; + ftbe->up=0; + ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0; + ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR; + ftbe->phrase= NULL; + ftbe->document= 0; + ftb->root=ftbe; + _ftb_parse_query(ftb, query, query_len, keynr == NO_SUCH_KEY ? + &ft_default_parser : + info->s->keyinfo[keynr].parser); + /* + Hack: instead of init_queue, we'll use reinit queue to be able + to alloc queue with alloc_root() + */ + if (! (ftb->queue.root= (byte **)alloc_root(&ftb->mem_root, + (ftb->queue.max_elements + 1) * + sizeof(void *)))) + goto err; + reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0, + (int (*)(void*, byte*, byte*))FTB_WORD_cmp, 0); + for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev) + queue_insert(&ftb->queue, (byte *)ftbw); + ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, + sizeof(FTB_WORD *)*ftb->queue.elements); + memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); + qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), + (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); + if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; + ftb->state=READY; + return ftb; +err: + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); + return 0; +} + + +typedef struct st_my_ftb_phrase_param +{ + LIST *phrase; + LIST *document; + CHARSET_INFO *cs; + uint phrase_length; + uint document_length; + uint match; +} MY_FTB_PHRASE_PARAM; + + +static int ftb_phrase_add_word(void *param, char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + MY_FTB_PHRASE_PARAM *phrase_param= (MY_FTB_PHRASE_PARAM *)param; + FT_WORD *w= (FT_WORD *)phrase_param->document->data; + LIST *phrase, *document; + w->pos= word; + w->len= word_len; + phrase_param->document= phrase_param->document->prev; + if (phrase_param->phrase_length > phrase_param->document_length) + { + phrase_param->document_length++; + return 0; + } + /* TODO: rewrite phrase search to avoid + comparing the same word twice. */ + for (phrase= phrase_param->phrase, document= phrase_param->document->next; + phrase; phrase= phrase->next, document= document->next) + { + FT_WORD *phrase_word= (FT_WORD *)phrase->data; + FT_WORD *document_word= (FT_WORD *)document->data; + if (my_strnncoll(phrase_param->cs, + (uchar*) phrase_word->pos, phrase_word->len, + (uchar*) document_word->pos, document_word->len)) + return 0; + } + phrase_param->match++; + return 0; +} + + +static int ftb_check_phrase_internal(void *param, char *document, int len) +{ + FT_WORD word; + MY_FTB_PHRASE_PARAM *phrase_param= (MY_FTB_PHRASE_PARAM *)param; + const char *docend= document + len; + while (maria_ft_simple_get_word(phrase_param->cs, &document, docend, &word, FALSE)) + { + ftb_phrase_add_word(param, word.pos, word.len, 0); + if (phrase_param->match) + return 1; + } + return 0; +} + + +/* + Checks if given buffer matches phrase list. + + SYNOPSIS + _ftb_check_phrase() + s0 start of buffer + e0 end of buffer + phrase broken into list phrase + cs charset info + + RETURN VALUE + 1 is returned if phrase found, 0 else. +*/ + +static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len, + FTB_EXPR *ftbe, struct st_mysql_ftparser *parser) +{ + MY_FTB_PHRASE_PARAM ftb_param; + MYSQL_FTPARSER_PARAM *param; + DBUG_ENTER("_ftb_check_phrase"); + DBUG_ASSERT(parser); + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr))) + DBUG_RETURN(0); + ftb_param.phrase= ftbe->phrase; + ftb_param.document= ftbe->document; + ftb_param.cs= ftb->charset; + ftb_param.phrase_length= list_length(ftbe->phrase); + ftb_param.document_length= 1; + ftb_param.match= 0; + + param->mysql_parse= ftb_check_phrase_internal; + param->mysql_add_word= ftb_phrase_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->doc= (byte *)document; + param->length= len; + param->mode= MYSQL_FTPARSER_WITH_STOPWORDS; + parser->parse(param); + DBUG_RETURN(ftb_param.match ? 1 : 0); +} + + +static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) +{ + FT_SEG_ITERATOR ftsi; + FTB_EXPR *ftbe; + float weight=ftbw->weight; + int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); + my_off_t curdoc=ftbw->docid[mode]; + struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? + &ft_default_parser : + ftb->info->s->keyinfo[ftb->keynr].parser; + + for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) + { + ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks); + if (ftbe->docid[mode] != curdoc) + { + ftbe->cur_weight=0; + ftbe->yesses=ftbe->nos=0; + ftbe->docid[mode]=curdoc; + } + if (ftbe->nos) + break; + if (yn & FTB_FLAG_YES) + { + weight /= ftbe->ythresh; + ftbe->cur_weight += weight; + if ((int) ++ftbe->yesses == ythresh) + { + yn=ftbe->flags; + weight=ftbe->cur_weight*ftbe->weight; + if (mode && ftbe->phrase) + { + int not_found=1; + + memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); + while (_ma_ft_segiterator(&ftsi) && not_found) + { + if (!ftsi.pos) + continue; + not_found = ! _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, + ftbe, parser); + } + if (not_found) break; + } /* ftbe->quot */ + } + else + break; + } + else + if (yn & FTB_FLAG_NO) + { + /* + NOTE: special sort function of queue assures that all + (yn & FTB_FLAG_NO) != 0 + events for every particular subexpression will + "auto-magically" happen BEFORE all the + (yn & FTB_FLAG_YES) != 0 events. So no + already matched expression can become not-matched again. + */ + ++ftbe->nos; + break; + } + else + { + if (ftbe->ythresh) + weight/=3; + ftbe->cur_weight += weight; + if ((int) ftbe->yesses < ythresh) + break; + if (!(yn & FTB_FLAG_WONLY)) + yn= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ; + weight*= ftbe->weight; + } + } +} + + +int maria_ft_boolean_read_next(FT_INFO *ftb, char *record) +{ + FTB_EXPR *ftbe; + FTB_WORD *ftbw; + MARIA_HA *info=ftb->info; + my_off_t curdoc; + + if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE) + return -1; + + /* black magic ON */ + if ((int) _ma_check_index(info, ftb->keynr) < 0) + return my_errno; + if (_ma_readinfo(info, F_RDLCK, 1)) + return my_errno; + /* black magic OFF */ + + if (!ftb->queue.elements) + return my_errno=HA_ERR_END_OF_FILE; + + /* Attention!!! Address of a local variable is used here! See err: label */ + ftb->queue.first_cmp_arg=(void *)&curdoc; + + while (ftb->state == INDEX_SEARCH && + (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) != + HA_OFFSET_ERROR) + { + while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) + { + _ftb_climb_the_tree(ftb, ftbw, 0); + + /* update queue */ + _ft2_search(ftb, ftbw, 0); + queue_replaced(& ftb->queue); + } + + ftbe=ftb->root; + if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 && + ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos) + { + /* curdoc matched ! */ + if (is_tree_inited(&ftb->no_dupes) && + tree_insert(&ftb->no_dupes, &curdoc, 0, + ftb->no_dupes.custom_arg)->count >1) + /* but it managed already to get past this line once */ + continue; + + info->lastpos=curdoc; + /* Clear all states, except that the table was updated */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (!(*info->read_record)(info,curdoc,record)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + if (ftb->with_scan && maria_ft_boolean_find_relevance(ftb,record,0)==0) + continue; /* no match */ + my_errno=0; + goto err; + } + goto err; + } + } + ftb->state=INDEX_DONE; + my_errno=HA_ERR_END_OF_FILE; +err: + ftb->queue.first_cmp_arg=(void *)0; + return my_errno; +} + + +typedef struct st_my_ftb_find_param +{ + FT_INFO *ftb; + FT_SEG_ITERATOR *ftsi; +} MY_FTB_FIND_PARAM; + + +static int ftb_find_relevance_add_word(void *param, char *word, int len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + MY_FTB_FIND_PARAM *ftb_param= (MY_FTB_FIND_PARAM *)param; + FT_INFO *ftb= ftb_param->ftb; + FTB_WORD *ftbw; + int a, b, c; + for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2) + { + ftbw= ftb->list[c]; + if (ha_compare_text(ftb->charset, (uchar*)word, len, + (uchar*)ftbw->word+1, ftbw->len-1, + (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0) + b= c; + else + a= c; + } + for (; c >= 0; c--) + { + ftbw= ftb->list[c]; + if (ha_compare_text(ftb->charset, (uchar*)word, len, + (uchar*)ftbw->word + 1,ftbw->len - 1, + (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) + break; + if (ftbw->docid[1] == ftb->info->lastpos) + continue; + ftbw->docid[1]= ftb->info->lastpos; + _ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi); + } + return(0); +} + + +static int ftb_find_relevance_parse(void *param, char *doc, int len) +{ + FT_INFO *ftb= ((MY_FTB_FIND_PARAM *)param)->ftb; + char *end= doc + len; + FT_WORD w; + while (maria_ft_simple_get_word(ftb->charset, &doc, end, &w, TRUE)) + ftb_find_relevance_add_word(param, w.pos, w.len, 0); + return(0); +} + + +float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) +{ + FTB_EXPR *ftbe; + FT_SEG_ITERATOR ftsi, ftsi2; + my_off_t docid=ftb->info->lastpos; + MY_FTB_FIND_PARAM ftb_param; + MYSQL_FTPARSER_PARAM *param; + struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? + &ft_default_parser : + ftb->info->s->keyinfo[ftb->keynr].parser; + + if (docid == HA_OFFSET_ERROR) + return -2.0; + if (!ftb->queue.elements) + return 0; + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr))) + return 0; + + if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos) + { + FTB_EXPR *x; + uint i; + + for (i=0; i < ftb->queue.elements; i++) + { + ftb->list[i]->docid[1]=HA_OFFSET_ERROR; + for (x=ftb->list[i]->up; x; x=x->up) + x->docid[1]=HA_OFFSET_ERROR; + } + } + + ftb->lastpos=docid; + + if (ftb->keynr==NO_SUCH_KEY) + _ma_ft_segiterator_dummy_init(record, length, &ftsi); + else + _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); + memcpy(&ftsi2, &ftsi, sizeof(ftsi)); + + ftb_param.ftb= ftb; + ftb_param.ftsi= &ftsi2; + while (_ma_ft_segiterator(&ftsi)) + { + if (!ftsi.pos) + continue; + /* Since subsequent call to _ftb_check_phrase overwrites param elements, + it must be reinitialized at each iteration _inside_ the loop. */ + param->mysql_parse= ftb_find_relevance_parse; + param->mysql_add_word= ftb_find_relevance_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + param->doc= (byte *)ftsi.pos; + param->length= ftsi.len; + parser->parse(param); + } + ftbe=ftb->root; + if (ftbe->docid[1]==docid && ftbe->cur_weight>0 && + ftbe->yesses>=ftbe->ythresh && !ftbe->nos) + { /* row matched ! */ + return ftbe->cur_weight; + } + else + { /* match failed ! */ + return 0.0; + } +} + + +void maria_ft_boolean_close_search(FT_INFO *ftb) +{ + if (is_tree_inited(& ftb->no_dupes)) + { + delete_tree(& ftb->no_dupes); + } + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); +} + + +float maria_ft_boolean_get_relevance(FT_INFO *ftb) +{ + return ftb->root->cur_weight; +} + + +void maria_ft_boolean_reinit_search(FT_INFO *ftb) +{ + _ftb_init_index_search(ftb); +} diff --git a/storage/maria/ma_ft_eval.c b/storage/maria/ma_ft_eval.c new file mode 100644 index 00000000000..b9b496fc268 --- /dev/null +++ b/storage/maria/ma_ft_eval.c @@ -0,0 +1,254 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include "maria_ft_eval.h" +#include +#include + +static void print_error(int exit_code, const char *fmt,...); +static void get_options(int argc, char *argv[]); +static int create_record(char *pos, FILE *file); +static void usage(); + +static struct my_option my_long_options[] = +{ + {"", 's', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'q', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '#', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +int main(int argc, char *argv[]) +{ + MARIA_HA *file; + int i,j; + + MY_INIT(argv[0]); + get_options(argc,argv); + bzero((char*)recinfo,sizeof(recinfo)); + + maria_init(); + /* First define 2 columns */ + recinfo[0].type=FIELD_SKIP_ENDSPACE; + recinfo[0].length=docid_length; + recinfo[1].type=FIELD_BLOB; + recinfo[1].length= 4+maria_portable_sizeof_char_ptr; + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].seg[0].type= HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].flag= HA_BLOB_PART; + keyinfo[0].seg[0].start=recinfo[0].length; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit=0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].bit_start=4; + keyinfo[0].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[0].flag = HA_FULLTEXT; + + if (!silent) + printf("- Creating isam-file\n"); + if (maria_create(filename,1,keyinfo,2,recinfo,0,NULL,(MARIA_CREATE_INFO*) 0,0)) + goto err; + if (!(file=maria_open(filename,2,0))) + goto err; + if (!silent) + printf("Initializing stopwords\n"); + maria_ft_init_stopwords(stopwordlist); + + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + i=0; + while (create_record(record,df)) + { + error=maria_write(file,record); + if (error) + printf("I= %2d maria_write: %d errno: %d\n",i,error,my_errno); + i++; + } + fclose(df); + + if (maria_close(file)) goto err; + if (!silent) + printf("- Reopening file\n"); + if (!(file=maria_open(filename,2,0))) goto err; + if (!silent) + printf("- Reading rows with key\n"); + for (i=1;create_record(record,qf);i++) + { + FT_DOCLIST *result; + double w; + int t, err; + + result=maria_ft_nlq_init_search(file,0,blob_record,(uint) strlen(blob_record),1); + if (!result) + { + printf("Query %d failed with errno %3d\n",i,my_errno); + goto err; + } + if (!silent) + printf("Query %d. Found: %d.\n",i,result->ndocs); + for (j=0;(err=maria_ft_nlq_read_next(result, read_record))==0;j++) + { + t=uint2korr(read_record); + w=maria_ft_nlq_get_relevance(result); + printf("%d %.*s %f\n",i,t,read_record+2,w); + } + if (err != HA_ERR_END_OF_FILE) + { + printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno); + goto err; + } + maria_ft_nlq_close_search(result); + } + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); + + err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ + +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch (optid) { + case 's': + if (stopwordlist && stopwordlist != maria_ft_precompiled_stopwords) + break; + { + FILE *f; char s[HA_FT_MAXLEN]; int i=0,n=SWL_INIT; + + if (!(stopwordlist=(const char**) malloc(n*sizeof(char *)))) + print_error(1,"malloc(%d)",n*sizeof(char *)); + if (!(f=fopen(argument,"r"))) + print_error(1,"fopen(%s)",argument); + while (!feof(f)) + { + if (!(fgets(s,HA_FT_MAXLEN,f))) + print_error(1,"fgets(s,%d,%s)",HA_FT_MAXLEN,argument); + if (!(stopwordlist[i++]=strdup(s))) + print_error(1,"strdup(%s)",s); + if (i >= n) + { + n+=SWL_PLUS; + if (!(stopwordlist=(const char**) realloc((char*) stopwordlist, + n*sizeof(char *)))) + print_error(1,"realloc(%d)",n*sizeof(char *)); + } + } + fclose(f); + stopwordlist[i]=NULL; + break; + } + case 'q': silent=1; break; + case 'S': if (stopwordlist==maria_ft_precompiled_stopwords) stopwordlist=NULL; break; + case '#': + DBUG_PUSH (argument); + break; + case 'V': + case '?': + case 'h': + usage(); + exit(1); + } + return 0; +} + + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + if (!(d_file=argv[optind])) print_error(1,"No d_file"); + if (!(df=fopen(d_file,"r"))) + print_error(1,"fopen(%s)",d_file); + if (!(q_file=argv[optind+1])) print_error(1,"No q_file"); + if (!(qf=fopen(q_file,"r"))) + print_error(1,"fopen(%s)",q_file); + return; +} /* get options */ + + +static int create_record(char *pos, FILE *file) +{ + uint tmp; char *ptr; + + bzero((char *)pos,MAX_REC_LENGTH); + + /* column 1 - VARCHAR */ + if (!(fgets(pos+2,MAX_REC_LENGTH-32,file))) + { + if (feof(file)) + return 0; + else + print_error(1,"fgets(docid) - 1"); + } + tmp=(uint) strlen(pos+2)-1; + int2store(pos,tmp); + pos+=recinfo[0].length; + + /* column 2 - BLOB */ + + if (!(fgets(blob_record,MAX_BLOB_LENGTH,file))) + print_error(1,"fgets(docid) - 2"); + tmp=(uint) strlen(blob_record); + int4store(pos,tmp); + ptr=blob_record; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + return 1; +} + +/* VARARGS */ + +static void print_error(int exit_code, const char *fmt,...) +{ + va_list args; + + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + exit(exit_code); +} + + +static void usage() +{ + printf("%s [options]\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_ft_eval.h b/storage/maria/ma_ft_eval.h new file mode 100644 index 00000000000..d9b5c51642c --- /dev/null +++ b/storage/maria/ma_ft_eval.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2006 MySQL AB & Sergei A. Golubchik + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +const char **stopwordlist=maria_ft_precompiled_stopwords; + +#define MAX_REC_LENGTH 128 +#define MAX_BLOB_LENGTH 60000 +char record[MAX_REC_LENGTH], read_record[MAX_REC_LENGTH+MAX_BLOB_LENGTH]; +char blob_record[MAX_BLOB_LENGTH+20*20]; + +char *filename= (char*) "EVAL"; + +int silent=0, error=0; + +uint key_length=MAX_BLOB_LENGTH,docid_length=32; +char *d_file, *q_file; +FILE *df,*qf; + +MARIA_COLUMNDEF recinfo[3]; +MARIA_KEYDEF keyinfo[2]; +HA_KEYSEG keyseg[10]; + +#define SWL_INIT 500 +#define SWL_PLUS 50 + +#define MAX_LINE_LENGTH 128 +char line[MAX_LINE_LENGTH]; diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c new file mode 100644 index 00000000000..a9741787fc9 --- /dev/null +++ b/storage/maria/ma_ft_nlq_search.c @@ -0,0 +1,366 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#define FT_CORE +#include "ma_ftdefs.h" + +/* search with natural language queries */ + +typedef struct ft_doc_rec +{ + my_off_t dpos; + double weight; +} FT_DOC; + +struct st_ft_info +{ + struct _ft_vft *please; + MARIA_HA *info; + int ndocs; + int curdoc; + FT_DOC doc[1]; +}; + +typedef struct st_all_in_one +{ + MARIA_HA *info; + uint keynr; + CHARSET_INFO *charset; + uchar *keybuff; + TREE dtree; +} ALL_IN_ONE; + +typedef struct st_ft_superdoc +{ + FT_DOC doc; + FT_WORD *word_ptr; + double tmp_weight; +} FT_SUPERDOC; + +static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), + FT_SUPERDOC *p1, FT_SUPERDOC *p2) +{ + if (p1->doc.dpos < p2->doc.dpos) + return -1; + if (p1->doc.dpos == p2->doc.dpos) + return 0; + return 1; +} + +static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) +{ + int subkeys, r; + uint keylen, doc_cnt; + FT_SUPERDOC sdoc, *sptr; + TREE_ELEMENT *selem; + double gweight=1; + MARIA_HA *info=aio->info; + uchar *keybuff=aio->keybuff; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr; + my_off_t key_root=info->s->state.key_root[aio->keynr]; + uint extra=HA_FT_WLEN+info->s->base.rec_reflength; +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + float tmp_weight; +#else +#error +#endif + + DBUG_ENTER("walk_and_match"); + + word->weight=LWS_FOR_QUERY; + + keylen= _ma_ft_make_key(info,aio->keynr,(char*) keybuff,word,0); + keylen-=HA_FT_WLEN; + doc_cnt=0; + + /* Skip rows inserted by current inserted */ + for (r= _ma_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ; + !r && + (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && + info->lastpos >= info->state->data_file_length ; + r= _ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, SEARCH_BIGGER, key_root)) + ; + + info->update|= HA_STATE_AKTIV; /* for _ma_test_if_changed() */ + + /* The following should be safe, even if we compare doubles */ + while (!r && gweight) + { + + if (keylen && + ha_compare_text(aio->charset,info->lastkey+1, + info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) + break; + + if (subkeys<0) + { + if (doc_cnt) + DBUG_RETURN(1); /* index is corrupted */ + /* + TODO here: unsafe optimization, should this word + be skipped (based on subkeys) ? + */ + keybuff+=keylen; + keyinfo=& info->s->ft2_keyinfo; + key_root=info->lastpos; + keylen=0; + r= _ma_search_first(info, keyinfo, key_root); + goto do_skip; + } +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + tmp_weight=*(float*)&subkeys; +#else +#error +#endif + /* The following should be safe, even if we compare doubles */ + if (tmp_weight==0) + DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */ + + sdoc.doc.dpos=info->lastpos; + + /* saving document matched into dtree */ + if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg))) + DBUG_RETURN(1); + + sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); + + if (selem->count==1) /* document's first match */ + sptr->doc.weight=0; + else + sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight; + + sptr->word_ptr=word; + sptr->tmp_weight=tmp_weight; + + doc_cnt++; + + gweight=word->weight*GWS_IN_USE; + if (gweight < 0 || doc_cnt > 2000000) + gweight=0; + + if (_ma_test_if_changed(info) == 0) + r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); + else + r= _ma_search(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); +do_skip: + while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && + !r && info->lastpos >= info->state->data_file_length) + r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); + + } + word->weight=gweight; + + DBUG_RETURN(0); +} + + +static int walk_and_copy(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), FT_DOC **to) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + (*to)->dpos=from->doc.dpos; + (*to)->weight=from->doc.weight; + (*to)++; + DBUG_RETURN(0); +} + +static int walk_and_push(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), QUEUE *best) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + set_if_smaller(best->elements, ft_query_expansion_limit-1); + queue_insert(best, (byte *)& from->doc); + DBUG_RETURN(0); +} + + +static int FT_DOC_cmp(void *unused __attribute__((unused)), + FT_DOC *a, FT_DOC *b) +{ + return sgn(b->weight - a->weight); +} + + +FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, + uint query_len, uint flags, byte *record) +{ + TREE wtree; + ALL_IN_ONE aio; + FT_DOC *dptr; + FT_INFO *dlist=NULL; + my_off_t saved_lastpos=info->lastpos; + struct st_mysql_ftparser *parser; + MYSQL_FTPARSER_PARAM *ftparser_param; + DBUG_ENTER("maria_ft_init_nlq_search"); + +/* black magic ON */ + if ((int) (keynr = _ma_check_index(info,keynr)) < 0) + DBUG_RETURN(NULL); + if (_ma_readinfo(info,F_RDLCK,1)) + DBUG_RETURN(NULL); +/* black magic OFF */ + + aio.info=info; + aio.keynr=keynr; + aio.charset=info->s->keyinfo[keynr].seg->charset; + aio.keybuff=info->lastkey+info->s->base.max_key_length; + parser= info->s->keyinfo[keynr].parser; + if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr))) + goto err; + + bzero(&wtree,sizeof(wtree)); + + init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, + NULL, NULL); + + maria_ft_parse_init(&wtree, aio.charset); + if (maria_ft_parse(&wtree, query, query_len, 0, parser, ftparser_param)) + goto err; + + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + if (flags & FT_EXPAND && ft_query_expansion_limit) + { + QUEUE best; + init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp, + 0); + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push, + &best, left_root_right); + while (best.elements) + { + my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos; + if (!(*info->read_record)(info,docid,record)) + { + info->update|= HA_STATE_AKTIV; + _ma_ft_parse(&wtree, info, keynr, record, 1, ftparser_param); + } + } + delete_queue(&best); + reset_tree(&aio.dtree); + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + } + + /* + If ndocs == 0, this will not allocate RAM for FT_INFO.doc[], + so if ndocs == 0, FT_INFO.doc[] must not be accessed. + */ + dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ + sizeof(FT_DOC)* + (int)(aio.dtree.elements_in_tree-1), + MYF(0)); + if (!dlist) + goto err; + + dlist->please= (struct _ft_vft *) & _ma_ft_vft_nlq; + dlist->ndocs=aio.dtree.elements_in_tree; + dlist->curdoc=-1; + dlist->info=aio.info; + dptr=dlist->doc; + + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy, + &dptr, left_root_right); + + if (flags & FT_SORTED) + qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp, 0); + +err: + delete_tree(&aio.dtree); + delete_tree(&wtree); + info->lastpos=saved_lastpos; + DBUG_RETURN(dlist); +} + + +int maria_ft_nlq_read_next(FT_INFO *handler, char *record) +{ + MARIA_HA *info= (MARIA_HA *) handler->info; + + if (++handler->curdoc >= handler->ndocs) + { + --handler->curdoc; + return HA_ERR_END_OF_FILE; + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + info->lastpos=handler->doc[handler->curdoc].dpos; + if (!(*info->read_record)(info,info->lastpos,record)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; + } + return my_errno; +} + + +float maria_ft_nlq_find_relevance(FT_INFO *handler, + byte *record __attribute__((unused)), + uint length __attribute__((unused))) +{ + int a,b,c; + FT_DOC *docs=handler->doc; + my_off_t docid=handler->info->lastpos; + + if (docid == HA_POS_ERROR) + return -5.0; + + /* Assuming docs[] is sorted by dpos... */ + + for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) + { + if (docs[c].dpos > docid) + b=c; + else + a=c; + } + /* bounds check to avoid accessing unallocated handler->doc */ + if (a < handler->ndocs && docs[a].dpos == docid) + return (float) docs[a].weight; + else + return 0.0; +} + + +void maria_ft_nlq_close_search(FT_INFO *handler) +{ + my_free((gptr)handler,MYF(0)); +} + + +float maria_ft_nlq_get_relevance(FT_INFO *handler) +{ + return (float) handler->doc[handler->curdoc].weight; +} + + +void maria_ft_nlq_reinit_search(FT_INFO *handler) +{ + handler->curdoc=-1; +} + diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c new file mode 100644 index 00000000000..983bebf3562 --- /dev/null +++ b/storage/maria/ma_ft_parser.c @@ -0,0 +1,394 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#include "ma_ftdefs.h" + +typedef struct st_maria_ft_docstat { + FT_WORD *list; + uint uniq; + double sum; +} FT_DOCSTAT; + + +typedef struct st_my_maria_ft_parser_param +{ + TREE *wtree; + my_bool with_alloc; +} MY_FT_PARSER_PARAM; + + +static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) +{ + return ha_compare_text(cs, (uchar*) w1->pos, w1->len, + (uchar*) w2->pos, w2->len, 0, 0); +} + +static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) +{ + word->weight=LWS_IN_USE; + docstat->sum+=word->weight; + memcpy_fixed((docstat->list)++,word,sizeof(FT_WORD)); + return 0; +} + +/* transforms tree of words into the array, applying normalization */ + +FT_WORD * maria_ft_linearize(TREE *wtree) +{ + FT_WORD *wlist,*p; + FT_DOCSTAT docstat; + DBUG_ENTER("maria_ft_linearize"); + + if ((wlist=(FT_WORD *) my_malloc(sizeof(FT_WORD)* + (1+wtree->elements_in_tree),MYF(0)))) + { + docstat.list=wlist; + docstat.uniq=wtree->elements_in_tree; + docstat.sum=0; + tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right); + } + delete_tree(wtree); + if (!wlist) + DBUG_RETURN(NULL); + + docstat.list->pos=NULL; + + for (p=wlist;p->pos;p++) + { + p->weight=PRENORM_IN_USE; + } + + for (p=wlist;p->pos;p++) + { + p->weight/=NORM_IN_USE; + } + + DBUG_RETURN(wlist); +} + +my_bool maria_ft_boolean_check_syntax_string(const byte *str) +{ + uint i, j; + + if (!str || + (strlen(str)+1 != sizeof(ft_boolean_syntax)) || + (str[0] != ' ' && str[1] != ' ')) + return 1; + for (i=0; i 127 || + my_isalnum(default_charset_info, str[i])) + return 1; + for (j=0; jyesno=(FTB_YES==' ') ? 1 : (param->quot != 0); + param->weight_adjust= param->wasign= 0; + param->type= FT_TOKEN_EOF; + + while (docquot) + { + param->quot=doc; + *start=doc+1; + param->type= FT_TOKEN_RIGHT_PAREN; + goto ret; + } + if (!param->quot) + { + if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) + { + /* param->prev=' '; */ + *start=doc+1; + if (*doc == FTB_LQUOT) param->quot=*start; + param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN); + goto ret; + } + if (param->prev == ' ') + { + if (*doc == FTB_YES ) { param->yesno=+1; continue; } else + if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else + if (*doc == FTB_NO ) { param->yesno=-1; continue; } else + if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else + if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else + if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; } + } + } + param->prev=*doc; + param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); + param->weight_adjust= param->wasign= 0; + } + + mwc=length=0; + for (word->pos=doc; docprev='A'; /* be sure *prev is true_word_char */ + word->len= (uint)(doc-word->pos) - mwc; + if ((param->trunc=(doc= ft_min_word_len && !is_stopword(word->pos, word->len)) + || param->trunc) && length < ft_max_word_len) + { + *start=doc; + param->type= FT_TOKEN_WORD; + goto ret; + } + else if (length) /* make sure length > 0 (if start contains spaces only) */ + { + *start= doc; + param->type= FT_TOKEN_STOPWORD; + goto ret; + } + } + if (param->quot) + { + param->quot=*start=doc; + param->type= 3; /* FT_RBR */ + goto ret; + } +ret: + return param->type; +} + +byte maria_ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, + FT_WORD *word, my_bool skip_stopwords) +{ + byte *doc= *start; + uint mwc, length, mbl; + DBUG_ENTER("maria_ft_simple_get_word"); + + do + { + for (;; doc++) + { + if (doc >= end) DBUG_RETURN(0); + if (true_word_char(cs, *doc)) break; + } + + mwc= length= 0; + for (word->pos=doc; doclen= (uint)(doc-word->pos) - mwc; + + if (skip_stopwords == FALSE || + (length >= ft_min_word_len && length < ft_max_word_len && + !is_stopword(word->pos, word->len))) + { + *start= doc; + DBUG_RETURN(1); + } + } while (doc < end); + DBUG_RETURN(0); +} + +void maria_ft_parse_init(TREE *wtree, CHARSET_INFO *cs) +{ + DBUG_ENTER("maria_ft_parse_init"); + if (!is_tree_inited(wtree)) + init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs); + DBUG_VOID_RETURN; +} + + +static int maria_ft_add_word(void *param, byte *word, uint word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + TREE *wtree; + FT_WORD w; + DBUG_ENTER("maria_ft_add_word"); + wtree= ((MY_FT_PARSER_PARAM *)param)->wtree; + if (((MY_FT_PARSER_PARAM *)param)->with_alloc) + { + byte *ptr; + /* allocating the data in the tree - to avoid mallocs and frees */ + DBUG_ASSERT(wtree->with_delete == 0); + ptr= (byte *)alloc_root(&wtree->mem_root, word_len); + memcpy(ptr, word, word_len); + w.pos= ptr; + } + else + w.pos= word; + w.len= word_len; + if (!tree_insert(wtree, &w, 0, wtree->custom_arg)) + { + delete_tree(wtree); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +static int maria_ft_parse_internal(void *param, byte *doc, uint doc_len) +{ + byte *end=doc+doc_len; + FT_WORD w; + TREE *wtree; + DBUG_ENTER("maria_ft_parse_internal"); + + wtree= ((MY_FT_PARSER_PARAM *)param)->wtree; + while (maria_ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE)) + if (maria_ft_add_word(param, w.pos, w.len, 0)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +int maria_ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc, + struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *param) +{ + MY_FT_PARSER_PARAM my_param; + DBUG_ENTER("maria_ft_parse"); + DBUG_ASSERT(parser); + my_param.wtree= wtree; + my_param.with_alloc= with_alloc; + + param->mysql_parse= maria_ft_parse_internal; + param->mysql_add_word= maria_ft_add_word; + param->mysql_ftparam= &my_param; + param->cs= wtree->custom_arg; + param->doc= doc; + param->length= doclen; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + DBUG_RETURN(parser->parse(param)); +} + + +MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, uint keynr) +{ + uint32 ftparser_nr; + struct st_mysql_ftparser *parser; + if (! info->ftparser_param) + { + /* info->ftparser_param can not be zero after the initialization, + because it always includes built-in fulltext parser. And built-in + parser can be called even if the table has no fulltext indexes and + no varchar/text fields. */ + if (! info->s->ftparsers) + { + /* It's ok that modification to shared structure is done w/o mutex + locks, because all threads would set the same variables to the + same values. */ + uint i, j, keys= info->s->state.header.keys, ftparsers= 1; + for (i= 0; i < keys; i++) + { + MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; + if (keyinfo->flag & HA_FULLTEXT) + { + for (j= 0;; j++) + { + if (j == i) + { + keyinfo->ftparser_nr= ftparsers++; + break; + } + if (info->s->keyinfo[j].flag & HA_FULLTEXT && + keyinfo->parser == info->s->keyinfo[j].parser) + { + keyinfo->ftparser_nr= info->s->keyinfo[j].ftparser_nr; + break; + } + } + } + } + info->s->ftparsers= ftparsers; + } + info->ftparser_param= (MYSQL_FTPARSER_PARAM *) + my_malloc(sizeof(MYSQL_FTPARSER_PARAM) * + info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL)); + if (! info->ftparser_param) + return 0; + } + if (keynr == NO_SUCH_KEY) + { + ftparser_nr= 0; + parser= &ft_default_parser; + } + else + { + ftparser_nr= info->s->keyinfo[keynr].ftparser_nr; + parser= info->s->keyinfo[keynr].parser; + } + if (! info->ftparser_param[ftparser_nr].mysql_add_word) + { + /* Note, that mysql_add_word is used here as a flag: + mysql_add_word == 0 - parser is not initialized + mysql_add_word != 0 - parser is initialized, or no + initialization needed. */ + info->ftparser_param[ftparser_nr].mysql_add_word= (void *)1; + if (parser->init && parser->init(&info->ftparser_param[ftparser_nr])) + return 0; + } + return &info->ftparser_param[ftparser_nr]; +} + + +void maria_ftparser_call_deinitializer(MARIA_HA *info) +{ + uint i, keys= info->s->state.header.keys; + if (! info->ftparser_param) + return; + for (i= 0; i < keys; i++) + { + MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; + MYSQL_FTPARSER_PARAM *ftparser_param= + &info->ftparser_param[keyinfo->ftparser_nr]; + if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word) + { + if (keyinfo->parser->deinit) + keyinfo->parser->deinit(ftparser_param); + ftparser_param->mysql_add_word= 0; + } + } +} diff --git a/storage/maria/ma_ft_stem.c b/storage/maria/ma_ft_stem.c new file mode 100644 index 00000000000..7a2f8cfd7c5 --- /dev/null +++ b/storage/maria/ma_ft_stem.c @@ -0,0 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* mulitingual stem */ diff --git a/storage/maria/ma_ft_test1.c b/storage/maria/ma_ft_test1.c new file mode 100644 index 00000000000..2880f6bcdc1 --- /dev/null +++ b/storage/maria/ma_ft_test1.c @@ -0,0 +1,317 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include "maria_ft_test1.h" +#include + +static int key_field=FIELD_VARCHAR,extra_field=FIELD_SKIP_ENDSPACE; +static uint key_length=200,extra_length=50; +static int key_type=HA_KEYTYPE_TEXT; +static int verbose=0,silent=0,skip_update=0, + no_keys=0,no_stopwords=0,no_search=0,no_fulltext=0; +static int create_flag=0,error=0; + +#define MAX_REC_LENGTH 300 +static char record[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; + +static int run_test(const char *filename); +static void get_options(int argc, char *argv[]); +static void create_record(char *, int); +static void usage(); + +static struct my_option my_long_options[] = +{ + {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 's', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'N', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'K', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'F', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'U', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '#', "", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +int main(int argc, char *argv[]) +{ + MY_INIT(argv[0]); + + get_options(argc,argv); + maria_init(); + + exit(run_test("FT1")); +} + +static MARIA_COLUMNDEF recinfo[3]; +static MARIA_KEYDEF keyinfo[2]; +static HA_KEYSEG keyseg[10]; + +static int run_test(const char *filename) +{ + MARIA_HA *file; + int i,j; + my_off_t pos; + + bzero((char*) recinfo,sizeof(recinfo)); + + /* First define 2 columns */ + recinfo[0].type=extra_field; + recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : + extra_length); + if (extra_field == FIELD_VARCHAR) + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length); + recinfo[1].type=key_field; + recinfo[1].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr : + key_length); + if (key_field == FIELD_VARCHAR) + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length); + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].seg[0].type= key_type; + keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART: + (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0; + keyinfo[0].seg[0].start=recinfo[0].length; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit= 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language= default_charset_info->number; + keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT); + + if (!silent) + printf("- Creating isam-file\n"); + if (maria_create(filename,(no_keys?0:1),keyinfo,2,recinfo,0,NULL, + (MARIA_CREATE_INFO*) 0, create_flag)) + goto err; + if (!(file=maria_open(filename,2,0))) + goto err; + + if (!silent) + printf("- %s stopwords\n",no_stopwords?"Skipping":"Initializing"); + maria_ft_init_stopwords(no_stopwords?NULL:maria_ft_precompiled_stopwords); + + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + for (i=NUPD ; indocs); + for (j=0;j<5;j++) + { + double w; int err; + err= maria_ft_nlq_read_next(result, read_record); + if (err==HA_ERR_END_OF_FILE) + { + printf("No more matches!\n"); + break; + } + else if (err) + { + printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno); + break; + } + w=maria_ft_nlq_get_relevance(result); + if (key_field == FIELD_VARCHAR) + { + uint l; + char *p; + p=recinfo[0].length+read_record; + l=uint2korr(p); + printf("%10.7f: %.*s\n",w,(int) l,p+2); + } + else + printf("%10.7f: %.*s\n",w,recinfo[1].length, + recinfo[0].length+read_record); + } + maria_ft_nlq_close_search(result); + } + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + +static char blob_key[MAX_REC_LENGTH]; +/* static char blob_record[MAX_REC_LENGTH+20*20]; */ + +void create_record(char *pos, int n) +{ + bzero((char*) pos,MAX_REC_LENGTH); + if (recinfo[0].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + strnmov(blob_key,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint tmp; + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(pos+pack_length); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); + pos+=recinfo[0].length; + } + else + { + strnmov(pos,data[n].f0,keyinfo[0].seg[0].length); + pos+=recinfo[0].length; + } + if (recinfo[1].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + strnmov(blob_key,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[1].length; + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint tmp; + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(pos+1); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); + pos+=recinfo[1].length; + } + else + { + strnmov(pos,data[n].f2,keyinfo[0].seg[0].length); + pos+=recinfo[1].length; + } +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'v': verbose=1; break; + case 's': silent=1; break; + case 'F': no_fulltext=1; no_search=1; + case 'U': skip_update=1; break; + case 'K': no_keys=no_search=1; break; + case 'N': no_search=1; break; + case 'S': no_stopwords=1; break; + case '#': + DBUG_PUSH (argument); + break; + case 'V': + case '?': + case 'h': + usage(); + exit(1); + } + return 0; +} + +/* Read options */ + +static void get_options(int argc,char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + return; +} /* get options */ + + +static void usage() +{ + printf("%s [options]\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_ft_test1.h b/storage/maria/ma_ft_test1.h new file mode 100644 index 00000000000..9449f063125 --- /dev/null +++ b/storage/maria/ma_ft_test1.h @@ -0,0 +1,421 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#define NUPD 20 +#define NDATAS 389 +struct { const char *f0, *f2; } data[NDATAS] = { + {"1", "General Information about MySQL"}, + {"1.1", "What is MySQL?"}, + {"1.2", "About this manual"}, + {"1.3", "History of MySQL"}, + {"1.4", "The main features of MySQL"}, + {"1.5", "General SQL information and tutorials"}, + {"1.6", "Useful MySQL-related links"}, + {"1.7", "What are stored procedures and triggers and so on?"}, + {"2", "MySQL mailing lists and how to ask questions/give error (bug) reports"}, + {"2.1", "Subscribing to/un-subscribing from the MySQL mailing list"}, + {"2.2", "Asking questions or reporting bugs"}, + {"2.3", "I think I have found a bug. What information do you need to help me?"}, + {"2.3.1", "MySQL keeps crashing"}, + {"2.4", "Guidelines for answering questions on the mailing list"}, + {"3", "Licensing or When do I have/want to pay for MySQL?"}, + {"3.1", "How much does MySQL cost?"}, + {"3.2", "How do I get commercial support?"}, + {"3.2.1", "Types of commercial support"}, + {"3.2.1.1", "Basic email support"}, + {"3.2.1.2", "Extended email support"}, +/*------------------------------- NUPD=20 -------------------------------*/ + {"3.2.1.3", "Asking: Login support"}, + {"3.2.1.4", "Extended login support"}, + {"3.3", "How do I pay for licenses/support?"}, + {"3.4", "Who do I contact when I want more information about licensing/support?"}, + {"3.5", "What Copyright does MySQL use?"}, + {"3.6", "When may I distribute MySQL commercially without a fee?"}, + {"3.7", "I want to sell a product that can be configured to use MySQL"}, + {"3.8", "I am running a commercial web server using MySQL"}, + {"3.9", "Do I need a license to sell commercial Perl/tcl/PHP/Web+ etc applications?"}, + {"3.10", "Possible future changes in the licensing"}, + {"4", "Compiling and installing MySQL"}, + {"4.1", "How do I get MySQL?"}, + {"4.2", "Which MySQL version should I use?"}, + {"4.3", "How/when will you release updates?"}, + {"4.4", "What operating systems does MySQL support?"}, + {"4.5", "Compiling MySQL from source code"}, + {"4.5.1", "Quick installation overview"}, + {"4.5.2", "Usual configure switches"}, + {"4.5.3", "Applying a patch"}, + {"4.6", "Problems compiling?"}, + {"4.7", "General compilation notes"}, + {"4.8", "MIT-pthreads notes (FreeBSD)"}, + {"4.9", "Perl installation comments"}, + {"4.10", "Special things to consider for some machine/OS combinations"}, + {"4.10.1", "Solaris notes"}, + {"4.10.2", "SunOS 4 notes"}, + {"4.10.3", "Linux notes for all versions"}, + {"4.10.3.1", "Linux-x86 notes"}, + {"4.10.3.2", "RedHat 5.0"}, + {"4.10.3.3", "RedHat 5.1"}, + {"4.10.3.4", "Linux-Sparc notes"}, + {"4.10.3.5", "Linux-Alpha notes"}, + {"4.10.3.6", "MkLinux notes"}, + {"4.10.4", "Alpha-DEC-Unix notes"}, + {"4.10.5", "Alpha-DEC-OSF1 notes"}, + {"4.10.6", "SGI-IRIX notes"}, + {"4.10.7", "FreeBSD notes"}, + {"4.10.7.1", "FreeBSD-3.0 notes"}, + {"4.10.8", "BSD/OS 2.# notes"}, + {"4.10.8.1", "BSD/OS 3.# notes"}, + {"4.10.9", "SCO notes"}, + {"4.10.10", "SCO Unixware 7.0 notes"}, + {"4.10.11", "IBM-AIX notes"}, + {"4.10.12", "HP-UX notes"}, + {"4.11", "TcX binaries"}, + {"4.12", "Win32 notes"}, + {"4.13", "Installation instructions for MySQL binary releases"}, + {"4.13.1", "How to get MySQL Perl support working"}, + {"4.13.2", "Linux notes"}, + {"4.13.3", "HP-UX notes"}, + {"4.13.4", "Linking client libraries"}, + {"4.14", "Problems running mysql_install_db"}, + {"4.15", "Problems starting MySQL"}, + {"4.16", "Automatic start/stop of MySQL"}, + {"4.17", "Option files"}, + {"5", "How standards-compatible is MySQL?"}, + {"5.1", "What extensions has MySQL to ANSI SQL92?"}, + {"5.2", "What functionality is missing in MySQL?"}, + {"5.2.1", "Sub-selects"}, + {"5.2.2", "SELECT INTO TABLE"}, + {"5.2.3", "Transactions"}, + {"5.2.4", "Triggers"}, + {"5.2.5", "Foreign Keys"}, + {"5.2.5.1", "Some reasons NOT to use FOREIGN KEYS"}, + {"5.2.6", "Views"}, + {"5.2.7", "-- as start of a comment"}, + {"5.3", "What standards does MySQL follow?"}, + {"5.4", "What functions exist only for compatibility?"}, + {"5.5", "Limitations of BLOB and TEXT types"}, + {"5.6", "How to cope without COMMIT-ROLLBACK"}, + {"6", "The MySQL access privilege system"}, + {"6.1", "What the privilege system does"}, + {"6.2", "Connecting to the MySQL server"}, + {"6.2.1", "Keeping your password secure"}, + {"6.3", "Privileges provided by MySQL"}, + {"6.4", "How the privilege system works"}, + {"6.5", "The privilege tables"}, + {"6.6", "Setting up the initial MySQL privileges"}, + {"6.7", "Adding new user privileges to MySQL"}, + {"6.8", "An example permission setup"}, + {"6.9", "Causes of Access denied errors"}, + {"6.10", "How to make MySQL secure against crackers"}, + {"7", "MySQL language reference"}, + {"7.1", "Literals: how to write strings and numbers"}, + {"7.1.1", "Strings"}, + {"7.1.2", "Numbers"}, + {"7.1.3", "NULL values"}, + {"7.1.4", "Database, table, index, column and alias names"}, + {"7.1.4.1", "Case sensitivity in names"}, + {"7.2", "Column types"}, + {"7.2.1", "Column type storage requirements"}, + {"7.2.5", "Numeric types"}, + {"7.2.6", "Date and time types"}, + {"7.2.6.1", "The DATE type"}, + {"7.2.6.2", "The TIME type"}, + {"7.2.6.3", "The DATETIME type"}, + {"7.2.6.4", "The TIMESTAMP type"}, + {"7.2.6.5", "The YEAR type"}, + {"7.2.6.6", "Miscellaneous date and time properties"}, + {"7.2.7", "String types"}, + {"7.2.7.1", "The CHAR and VARCHAR types"}, + {"7.2.7.2", "The BLOB and TEXT types"}, + {"7.2.7.3", "The ENUM type"}, + {"7.2.7.4", "The SET type"}, + {"7.2.8", "Choosing the right type for a column"}, + {"7.2.9", "Column indexes"}, + {"7.2.10", "Multiple-column indexes"}, + {"7.2.11", "Using column types from other database engines"}, + {"7.3", "Functions for use in SELECT and WHERE clauses"}, + {"7.3.1", "Grouping functions"}, + {"7.3.2", "Normal arithmetic operations"}, + {"7.3.3", "Bit functions"}, + {"7.3.4", "Logical operations"}, + {"7.3.5", "Comparison operators"}, + {"7.3.6", "String comparison functions"}, + {"7.3.7", "Control flow functions"}, + {"7.3.8", "Mathematical functions"}, + {"7.3.9", "String functions"}, + {"7.3.10", "Date and time functions"}, + {"7.3.11", "Miscellaneous functions"}, + {"7.3.12", "Functions for use with GROUP BY clauses"}, + {"7.4", "CREATE DATABASE syntax"}, + {"7.5", "DROP DATABASE syntax"}, + {"7.6", "CREATE TABLE syntax"}, + {"7.7", "ALTER TABLE syntax"}, + {"7.8", "OPTIMIZE TABLE syntax"}, + {"7.9", "DROP TABLE syntax"}, + {"7.10", "DELETE syntax"}, + {"7.11", "SELECT syntax"}, + {"7.12", "JOIN syntax"}, + {"7.13", "INSERT syntax"}, + {"7.14", "REPLACE syntax"}, + {"7.15", "LOAD DATA INFILE syntax"}, + {"7.16", "UPDATE syntax"}, + {"7.17", "USE syntax"}, + {"7.18", "SHOW syntax (Get information about tables, columns...)"}, + {"7.19", "EXPLAIN syntax (Get information about a SELECT)"}, + {"7.20", "DESCRIBE syntax (Get information about columns)"}, + {"7.21", "LOCK TABLES/UNLOCK TABLES syntax"}, + {"7.22", "SET OPTION syntax"}, + {"7.23", "GRANT syntax (Compatibility function)"}, + {"7.24", "CREATE INDEX syntax (Compatibility function)"}, + {"7.25", "DROP INDEX syntax (Compatibility function)"}, + {"7.26", "Comment syntax"}, + {"7.27", "CREATE FUNCTION/DROP FUNCTION syntax"}, + {"7.28", "Is MySQL picky about reserved words?"}, + {"8", "Example SQL queries"}, + {"8.1", "Queries from twin project"}, + {"8.1.1", "Find all non-distributed twins"}, + {"8.1.2", "Show a table on twin pair status"}, + {"9", "How safe/stable is MySQL?"}, + {"9.1", "How stable is MySQL?"}, + {"9.2", "Why are there is so many releases of MySQL?"}, + {"9.3", "Checking a table for errors"}, + {"9.4", "How to repair tables"}, + {"9.5", "Is there anything special to do when upgrading/downgrading MySQL?"}, + {"9.5.1", "Upgrading from a 3.21 version to 3.22"}, + {"9.5.2", "Upgrading from a 3.20 version to 3.21"}, + {"9.5.3", "Upgrading to another architecture"}, + {"9.6", "Year 2000 compliance"}, + {"10", "MySQL Server functions"}, + {"10.1", "What languages are supported by MySQL?"}, + {"10.1.1", "Character set used for data & sorting"}, + {"10.2", "The update log"}, + {"10.3", "How big can MySQL tables be?"}, + {"11", "Getting maximum performance from MySQL"}, + {"11.1", "How does one change the size of MySQL buffers?"}, + {"11.2", "How compiling and linking affects the speed of MySQL"}, + {"11.3", "How does MySQL use memory?"}, + {"11.4", "How does MySQL use indexes?"}, + {"11.5", "What optimizations are done on WHERE clauses?"}, + {"11.6", "How does MySQL open & close tables?"}, + {"11.6.0.1", "What are the drawbacks of creating possibly thousands of tables in a database?"}, + {"11.7", "How does MySQL lock tables?"}, + {"11.8", "How should I arrange my table to be as fast/small as possible?"}, + {"11.9", "What affects the speed of INSERT statements?"}, + {"11.10", "What affects the speed DELETE statements?"}, + {"11.11", "How do I get MySQL to run at full speed?"}, + {"11.12", "What are the different row formats? Or, when should VARCHAR/CHAR be used?"}, + {"11.13", "Why so many open tables?"}, + {"12", "MySQL benchmark suite"}, + {"13", "MySQL Utilites"}, + {"13.1", "Overview of the different MySQL programs"}, + {"13.2", "The MySQL table check, optimize and repair program"}, + {"13.2.1", "isamchk memory use"}, + {"13.2.2", "Getting low-level table information"}, + {"13.3", "The MySQL compressed read-only table generator"}, + {"14", "Adding new functions to MySQL"}, + {"15", "MySQL ODBC Support"}, + {"15.1", "Operating systems supported by MyODBC"}, + {"15.2", "How to report problems with MyODBC"}, + {"15.3", "Programs known to work with MyODBC"}, + {"15.4", "How to fill in the various fields in the ODBC administrator program"}, + {"15.5", "How to get the value of an AUTO_INCREMENT column in ODBC"}, + {"16", "Problems and common errors"}, + {"16.1", "Some common errors when using MySQL"}, + {"16.1.1", "MySQL server has gone away error"}, + {"16.1.2", "Can't connect to local MySQL server error"}, + {"16.1.3", "Out of memory error"}, + {"16.1.4", "Packet too large error"}, + {"16.1.5", "The table is full error"}, + {"16.1.6", "Commands out of sync error in client"}, + {"16.1.7", "Removing user error"}, + {"16.2", "How MySQL handles a full disk"}, + {"16.3", "How to run SQL commands from a text file"}, + {"16.4", "Where MySQL stores temporary files"}, + {"16.5", "Access denied error"}, + {"16.6", "How to run MySQL as a normal user"}, + {"16.7", "Problems with file permissions"}, + {"16.8", "File not found"}, + {"16.9", "Problems using DATE columns"}, + {"16.10", "Case sensitivity in searches"}, + {"16.11", "Problems with NULL values"}, + {"17", "Solving some common problems with MySQL"}, + {"17.1", "Database replication"}, + {"17.2", "Database backups"}, + {"18", "MySQL client tools and API's"}, + {"18.1", "MySQL C API"}, + {"18.2", "C API datatypes"}, + {"18.3", "C API function overview"}, + {"18.4", "C API function descriptions"}, + {"18.4.1", "mysql_affected_rows()"}, + {"18.4.2", "mysql_close()"}, + {"18.4.3", "mysql_connect()"}, + {"18.4.4", "mysql_create_db()"}, + {"18.4.5", "mysql_data_seek()"}, + {"18.4.6", "mysql_debug()"}, + {"18.4.7", "mysql_drop_db()"}, + {"18.4.8", "mysql_dump_debug_info()"}, + {"18.4.9", "mysql_eof()"}, + {"18.4.10", "mysql_errno()"}, + {"18.4.11", "mysql_error()"}, + {"18.4.12", "mysql_escape_string()"}, + {"18.4.13", "mysql_fetch_field()"}, + {"18.4.14", "mysql_fetch_fields()"}, + {"18.4.15", "mysql_fetch_field_direct()"}, + {"18.4.16", "mysql_fetch_lengths()"}, + {"18.4.17", "mysql_fetch_row()"}, + {"18.4.18", "mysql_field_seek()"}, + {"18.4.19", "mysql_field_tell()"}, + {"18.4.20", "mysql_free_result()"}, + {"18.4.21", "mysql_get_client_info()"}, + {"18.4.22", "mysql_get_host_info()"}, + {"18.4.23", "mysql_get_proto_info()"}, + {"18.4.24", "mysql_get_server_info()"}, + {"18.4.25", "mysql_info()"}, + {"18.4.26", "mysql_init()"}, + {"18.4.27", "mysql_insert_id()"}, + {"18.4.28", "mysql_kill()"}, + {"18.4.29", "mysql_list_dbs()"}, + {"18.4.30", "mysql_list_fields()"}, + {"18.4.31", "mysql_list_processes()"}, + {"18.4.32", "mysql_list_tables()"}, + {"18.4.33", "mysql_num_fields()"}, + {"18.4.34", "mysql_num_rows()"}, + {"18.4.35", "mysql_query()"}, + {"18.4.36", "mysql_real_connect()"}, + {"18.4.37", "mysql_real_query()"}, + {"18.4.38", "mysql_reload()"}, + {"18.4.39", "mysql_row_tell()"}, + {"18.4.40", "mysql_select_db()"}, + {"18.4.41", "mysql_shutdown()"}, + {"18.4.42", "mysql_stat()"}, + {"18.4.43", "mysql_store_result()"}, + {"18.4.44", "mysql_thread_id()"}, + {"18.4.45", "mysql_use_result()"}, + {"18.4.46", "Why is it that after mysql_query() returns success, mysql_store_result() sometimes returns NULL?"}, + {"18.4.47", "What results can I get from a query?"}, + {"18.4.48", "How can I get the unique ID for the last inserted row?"}, + {"18.4.49", "Problems linking with the C API"}, + {"18.4.50", "How to make a thread-safe client"}, + {"18.5", "MySQL Perl API's"}, + {"18.5.1", "DBI with DBD::mysql"}, + {"18.5.1.1", "The DBI interface"}, + {"18.5.1.2", "More DBI/DBD information"}, + {"18.6", "MySQL Java connectivity (JDBC)"}, + {"18.7", "MySQL PHP API's"}, + {"18.8", "MySQL C++ API's"}, + {"18.9", "MySQL Python API's"}, + {"18.10", "MySQL TCL API's"}, + {"19", "How MySQL compares to other databases"}, + {"19.1", "How MySQL compares to mSQL"}, + {"19.1.1", "How to convert mSQL tools for MySQL"}, + {"19.1.2", "How mSQL and MySQL client/server communications protocols differ"}, + {"19.1.3", "How mSQL 2.0 SQL syntax differs from MySQL"}, + {"19.2", "How MySQL compares to PostgreSQL"}, + {"A", "Some users of MySQL"}, + {"B", "Contributed programs"}, + {"C", "Contributors to MySQL"}, + {"D", "MySQL change history"}, + {"19.3", "Changes in release 3.22.x (Alpha version)"}, + {"19.3.1", "Changes in release 3.22.7"}, + {"19.3.2", "Changes in release 3.22.6"}, + {"19.3.3", "Changes in release 3.22.5"}, + {"19.3.4", "Changes in release 3.22.4"}, + {"19.3.5", "Changes in release 3.22.3"}, + {"19.3.6", "Changes in release 3.22.2"}, + {"19.3.7", "Changes in release 3.22.1"}, + {"19.3.8", "Changes in release 3.22.0"}, + {"19.4", "Changes in release 3.21.x"}, + {"19.4.1", "Changes in release 3.21.33"}, + {"19.4.2", "Changes in release 3.21.32"}, + {"19.4.3", "Changes in release 3.21.31"}, + {"19.4.4", "Changes in release 3.21.30"}, + {"19.4.5", "Changes in release 3.21.29"}, + {"19.4.6", "Changes in release 3.21.28"}, + {"19.4.7", "Changes in release 3.21.27"}, + {"19.4.8", "Changes in release 3.21.26"}, + {"19.4.9", "Changes in release 3.21.25"}, + {"19.4.10", "Changes in release 3.21.24"}, + {"19.4.11", "Changes in release 3.21.23"}, + {"19.4.12", "Changes in release 3.21.22"}, + {"19.4.13", "Changes in release 3.21.21a"}, + {"19.4.14", "Changes in release 3.21.21"}, + {"19.4.15", "Changes in release 3.21.20"}, + {"19.4.16", "Changes in release 3.21.19"}, + {"19.4.17", "Changes in release 3.21.18"}, + {"19.4.18", "Changes in release 3.21.17"}, + {"19.4.19", "Changes in release 3.21.16"}, + {"19.4.20", "Changes in release 3.21.15"}, + {"19.4.21", "Changes in release 3.21.14b"}, + {"19.4.22", "Changes in release 3.21.14a"}, + {"19.4.23", "Changes in release 3.21.13"}, + {"19.4.24", "Changes in release 3.21.12"}, + {"19.4.25", "Changes in release 3.21.11"}, + {"19.4.26", "Changes in release 3.21.10"}, + {"19.4.27", "Changes in release 3.21.9"}, + {"19.4.28", "Changes in release 3.21.8"}, + {"19.4.29", "Changes in release 3.21.7"}, + {"19.4.30", "Changes in release 3.21.6"}, + {"19.4.31", "Changes in release 3.21.5"}, + {"19.4.32", "Changes in release 3.21.4"}, + {"19.4.33", "Changes in release 3.21.3"}, + {"19.4.34", "Changes in release 3.21.2"}, + {"19.4.35", "Changes in release 3.21.0"}, + {"19.5", "Changes in release 3.20.x"}, + {"19.5.1", "Changes in release 3.20.18"}, + {"19.5.2", "Changes in release 3.20.17"}, + {"19.5.3", "Changes in release 3.20.16"}, + {"19.5.4", "Changes in release 3.20.15"}, + {"19.5.5", "Changes in release 3.20.14"}, + {"19.5.6", "Changes in release 3.20.13"}, + {"19.5.7", "Changes in release 3.20.11"}, + {"19.5.8", "Changes in release 3.20.10"}, + {"19.5.9", "Changes in release 3.20.9"}, + {"19.5.10", "Changes in release 3.20.8"}, + {"19.5.11", "Changes in release 3.20.7"}, + {"19.5.12", "Changes in release 3.20.6"}, + {"19.5.13", "Changes in release 3.20.3"}, + {"19.5.14", "Changes in release 3.20.0"}, + {"19.6", "Changes in release 3.19.x"}, + {"19.6.1", "Changes in release 3.19.5"}, + {"19.6.2", "Changes in release 3.19.4"}, + {"19.6.3", "Changes in release 3.19.3"}, + {"E", "Known errors and design deficiencies in MySQL"}, + {"F", "List of things we want to add to MySQL in the future (The TODO)"}, + {"19.7", "Things that must done in the real near future"}, + {"19.8", "Things that have to be done sometime"}, + {"19.9", "Some things we don't have any plans to do"}, + {"G", "Comments on porting to other systems"}, + {"19.10", "Debugging MySQL"}, + {"19.11", "Comments about RTS threads"}, + {"19.12", "What is the difference between different thread packages?"}, + {"H", "Description of MySQL regular expression syntax"}, + {"I", "What is Unireg?"}, + {"J", "The MySQL server license"}, + {"K", "The MySQL license for Microsoft operating systems"}, + {"*", "SQL command, type and function index"}, + {"*", "Concept Index"} +}; + +#define NQUERIES 5 +const char *query[NQUERIES]={ + "mysql information and manual", + "upgrading from previous version", + "column indexes", + "against about after more right the with/without", /* stopwords test */ + "mysql license and copyright" +}; diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c new file mode 100644 index 00000000000..c9e2112578c --- /dev/null +++ b/storage/maria/ma_ft_update.c @@ -0,0 +1,359 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* functions to work with full-text indices */ + +#include "ma_ftdefs.h" +#include + +void _ma_ft_segiterator_init(MARIA_HA *info, uint keynr, const byte *record, + FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator_init"); + + ftsi->num=info->s->keyinfo[keynr].keysegs; + ftsi->seg=info->s->keyinfo[keynr].seg; + ftsi->rec=record; + DBUG_VOID_RETURN; +} + +void _ma_ft_segiterator_dummy_init(const byte *record, uint len, + FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator_dummy_init"); + + ftsi->num=1; + ftsi->seg=0; + ftsi->pos=record; + ftsi->len=len; + DBUG_VOID_RETURN; +} + +/* + This function breaks convention "return 0 in success" + but it's easier to use like this + + while(_ma_ft_segiterator()) + + so "1" means "OK", "0" means "EOF" +*/ + +uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator"); + + if (!ftsi->num) + DBUG_RETURN(0); + + ftsi->num--; + if (!ftsi->seg) + DBUG_RETURN(1); + + ftsi->seg--; + + if (ftsi->seg->null_bit && + (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit)) + { + ftsi->pos=0; + DBUG_RETURN(1); + } + ftsi->pos= ftsi->rec+ftsi->seg->start; + if (ftsi->seg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= (ftsi->seg->bit_start); + ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos : + uint2korr(ftsi->pos)); + ftsi->pos+= pack_length; /* Skip VARCHAR length */ + DBUG_RETURN(1); + } + if (ftsi->seg->flag & HA_BLOB_PART) + { + ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos); + memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, + sizeof(char*)); + DBUG_RETURN(1); + } + ftsi->len=ftsi->seg->length; + DBUG_RETURN(1); +} + + +/* parses a document i.e. calls maria_ft_parse for every keyseg */ + +uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, + const byte *record, my_bool with_alloc, + MYSQL_FTPARSER_PARAM *param) +{ + FT_SEG_ITERATOR ftsi; + struct st_mysql_ftparser *parser; + DBUG_ENTER("_ma_ft_parse"); + + _ma_ft_segiterator_init(info, keynr, record, &ftsi); + + maria_ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset); + parser= info->s->keyinfo[keynr].parser; + while (_ma_ft_segiterator(&ftsi)) + { + if (ftsi.pos) + if (maria_ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc, parser, + param)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const byte *record) +{ + TREE ptree; + MYSQL_FTPARSER_PARAM *param; + DBUG_ENTER("_ma_ft_parserecord"); + if (! (param= maria_ftparser_call_initializer(info, keynr))) + DBUG_RETURN(NULL); + bzero((char*) &ptree, sizeof(ptree)); + if (_ma_ft_parse(&ptree, info, keynr, record, 0, param)) + DBUG_RETURN(NULL); + + DBUG_RETURN(maria_ft_linearize(&ptree)); +} + +static int _ma_ft_store(MARIA_HA *info, uint keynr, byte *keybuf, + FT_WORD *wlist, my_off_t filepos) +{ + uint key_length; + DBUG_ENTER("_ma_ft_store"); + + for (; wlist->pos; wlist++) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); + if (_ma_ck_write(info,keynr,(uchar*) keybuf,key_length)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +static int _ma_ft_erase(MARIA_HA *info, uint keynr, byte *keybuf, + FT_WORD *wlist, my_off_t filepos) +{ + uint key_length, err=0; + DBUG_ENTER("_ma_ft_erase"); + + for (; wlist->pos; wlist++) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); + if (_ma_ck_delete(info,keynr,(uchar*) keybuf,key_length)) + err=1; + } + DBUG_RETURN(err); +} + +/* + Compares an appropriate parts of two WORD_KEY keys directly out of records + returns 1 if they are different +*/ + +#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1 +#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL 0 + +int _ma_ft_cmp(MARIA_HA *info, uint keynr, const byte *rec1, const byte *rec2) +{ + FT_SEG_ITERATOR ftsi1, ftsi2; + CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + DBUG_ENTER("_ma_ft_cmp"); +#ifndef MYSQL_HAS_TRUE_CTYPE_IMPLEMENTATION + if (cs->mbmaxlen > 1) + DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); +#endif + + _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1); + _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2); + + while (_ma_ft_segiterator(&ftsi1) && _ma_ft_segiterator(&ftsi2)) + { + if ((ftsi1.pos != ftsi2.pos) && + (!ftsi1.pos || !ftsi2.pos || + ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + (uchar*) ftsi2.pos,ftsi2.len,0,0))) + DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); + } + DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL); +} + + +/* update a document entry */ + +int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, + const byte *oldrec, const byte *newrec, my_off_t pos) +{ + int error= -1; + FT_WORD *oldlist,*newlist, *old_word, *new_word; + CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + uint key_length; + int cmp, cmp2; + DBUG_ENTER("_ma_ft_update"); + + if (!(old_word=oldlist= _ma_ft_parserecord(info, keynr, oldrec))) + goto err0; + if (!(new_word=newlist= _ma_ft_parserecord(info, keynr, newrec))) + goto err1; + + error=0; + while(old_word->pos && new_word->pos) + { + cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len, + (uchar*) new_word->pos,new_word->len,0,0); + cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); + + if (cmp < 0 || cmp2) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos); + if ((error= _ma_ck_delete(info,keynr,(uchar*) keybuf,key_length))) + goto err2; + } + if (cmp > 0 || cmp2) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,new_word,pos); + if ((error= _ma_ck_write(info,keynr,(uchar*) keybuf,key_length))) + goto err2; + } + if (cmp<=0) old_word++; + if (cmp>=0) new_word++; + } + if (old_word->pos) + error= _ma_ft_erase(info,keynr,keybuf,old_word,pos); + else if (new_word->pos) + error= _ma_ft_store(info,keynr,keybuf,new_word,pos); + +err2: + my_free((char*) newlist,MYF(0)); +err1: + my_free((char*) oldlist,MYF(0)); +err0: + DBUG_RETURN(error); +} + + +/* adds a document to the collection */ + +int _ma_ft_add(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, + my_off_t pos) +{ + int error= -1; + FT_WORD *wlist; + DBUG_ENTER("_ma_ft_add"); + + if ((wlist= _ma_ft_parserecord(info, keynr, record))) + { + error= _ma_ft_store(info,keynr,keybuf,wlist,pos); + my_free((char*) wlist,MYF(0)); + } + DBUG_RETURN(error); +} + + +/* removes a document from the collection */ + +int _ma_ft_del(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, + my_off_t pos) +{ + int error= -1; + FT_WORD *wlist; + DBUG_ENTER("_ma_ft_del"); + DBUG_PRINT("enter",("keynr: %d",keynr)); + + if ((wlist= _ma_ft_parserecord(info, keynr, record))) + { + error= _ma_ft_erase(info,keynr,keybuf,wlist,pos); + my_free((char*) wlist,MYF(0)); + } + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} + +uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr, + my_off_t filepos) +{ + byte buf[HA_FT_MAXBYTELEN+16]; + DBUG_ENTER("_ma_ft_make_key"); + +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + { + float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight); + mi_float4store(buf,weight); + } +#else +#error +#endif + + int2store(buf+HA_FT_WLEN,wptr->len); + memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len); + DBUG_RETURN(_ma_make_key(info,keynr,(uchar*) keybuf,buf,filepos)); +} + + +/* + convert key value to ft2 +*/ + +uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key) +{ + my_off_t root; + DYNAMIC_ARRAY *da=info->ft1_to_ft2; + MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo; + uchar *key_ptr= (uchar*) dynamic_array_ptr(da, 0), *end; + uint length, key_length; + DBUG_ENTER("_ma_ft_convert_to_ft2"); + + /* we'll generate one pageful at once, and insert the rest one-by-one */ + /* calculating the length of this page ...*/ + length=(keyinfo->block_length-2) / keyinfo->keylength; + set_if_smaller(length, da->elements); + length=length * keyinfo->keylength; + + get_key_full_length_rdonly(key_length, key); + while (_ma_ck_delete(info, keynr, key, key_length) == 0) + { + /* + nothing to do here. + _ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys + */ + } + + /* creating pageful of keys */ + maria_putint(info->buff,length+2,0); + memcpy(info->buff+2, key_ptr, length); + info->buff_used=info->page_changed=1; /* info->buff is used */ + if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + + /* inserting the rest of key values */ + end= (uchar*) dynamic_array_ptr(da, da->elements); + for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) + if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME)) + DBUG_RETURN(-1); + + /* now, writing the word key entry */ + ft_intXstore(key+key_length, - (int) da->elements); + _ma_dpointer(info, key+key_length+HA_FT_WLEN, root); + + DBUG_RETURN(_ma_ck_real_write_btree(info, + info->s->keyinfo+keynr, + key, 0, + &info->s->state.key_root[keynr], + SEARCH_SAME)); +} diff --git a/storage/maria/ma_ftdefs.h b/storage/maria/ma_ftdefs.h new file mode 100644 index 00000000000..41248d1bc9c --- /dev/null +++ b/storage/maria/ma_ftdefs.h @@ -0,0 +1,149 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* some definitions for full-text indices */ + +#include "ma_fulltext.h" +#include +#include +#include +#include + +#define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_') +#define misc_word_char(X) 0 +#define word_char(s,X) (true_word_char(s,X) || misc_word_char(X)) + +#define FT_MAX_WORD_LEN_FOR_SORT 31 + +#define COMPILE_STOPWORDS_IN + +/* Interested readers may consult SMART + (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z) + for an excellent implementation of vector space model we use. + It also demonstrate the usage of different weghting techniques. + This code, though, is completely original and is not based on the + SMART code but was in some cases inspired by it. + + NORM_PIVOT was taken from the article + A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization", + ACM SIGIR'96, 21-29, 1996 + */ + +#define LWS_FOR_QUERY LWS_TF +#define LWS_IN_USE LWS_LOG +#define PRENORM_IN_USE PRENORM_AVG +#define NORM_IN_USE NORM_PIVOT +#define GWS_IN_USE GWS_PROB +/*==============================================================*/ +#define LWS_TF (count) +#define LWS_BINARY (count>0) +#define LWS_SQUARE (count*count) +#define LWS_LOG (count?(log( (double) count)+1):0) +/*--------------------------------------------------------------*/ +#define PRENORM_NONE (p->weight) +#define PRENORM_MAX (p->weight/docstat.max) +#define PRENORM_AUG (0.4+0.6*p->weight/docstat.max) +#define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq) +#define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq))) +/*--------------------------------------------------------------*/ +#define NORM_NONE (1) +#define NORM_SUM (docstat.nsum) +#define NORM_COS (sqrt(docstat.nsum2)) + +#define PIVOT_VAL (0.0115) +#define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq) +/*---------------------------------------------------------------*/ +#define GWS_NORM (1/sqrt(sum2)) +#define GWS_GFIDF (sum/doc_cnt) +/* Mysterious, but w/o (double) GWS_IDF performs better :-o */ +#define GWS_IDF log(aio->info->state->records/doc_cnt) +#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt) +#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 ) +#define GWS_FREQ (1.0/doc_cnt) +#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2) +#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3) +#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records)) +/*=================================================================*/ + +/* Boolean search operators */ +#define FTB_YES (ft_boolean_syntax[0]) +#define FTB_EGAL (ft_boolean_syntax[1]) +#define FTB_NO (ft_boolean_syntax[2]) +#define FTB_INC (ft_boolean_syntax[3]) +#define FTB_DEC (ft_boolean_syntax[4]) +#define FTB_LBR (ft_boolean_syntax[5]) +#define FTB_RBR (ft_boolean_syntax[6]) +#define FTB_NEG (ft_boolean_syntax[7]) +#define FTB_TRUNC (ft_boolean_syntax[8]) +#define FTB_LQUOT (ft_boolean_syntax[10]) +#define FTB_RQUOT (ft_boolean_syntax[11]) + +typedef struct st_maria_ft_word { + byte * pos; + uint len; + double weight; +} FT_WORD; + +int is_stopword(char *word, uint len); + +uint _ma_ft_make_key(MARIA_HA *, uint , byte *, FT_WORD *, my_off_t); + +byte maria_ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, + MYSQL_FTPARSER_BOOLEAN_INFO *); +byte maria_ft_simple_get_word(CHARSET_INFO *, byte **, const byte *, + FT_WORD *, my_bool); + +typedef struct _st_maria_ft_seg_iterator { + uint num, len; + HA_KEYSEG *seg; + const byte *rec, *pos; +} FT_SEG_ITERATOR; + +void _ma_ft_segiterator_init(MARIA_HA *, uint, const byte *, FT_SEG_ITERATOR *); +void _ma_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); +uint _ma_ft_segiterator(FT_SEG_ITERATOR *); + +void maria_ft_parse_init(TREE *, CHARSET_INFO *); +int maria_ft_parse(TREE *, byte *, int, my_bool, struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *param); +FT_WORD * maria_ft_linearize(TREE *); +FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *); +uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *, my_bool, + MYSQL_FTPARSER_PARAM *param); + +FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, byte *, uint, uint, byte *); +FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, byte *, uint, CHARSET_INFO *); + +extern const struct _ft_vft _ma_ft_vft_nlq; +int maria_ft_nlq_read_next(FT_INFO *, char *); +float maria_ft_nlq_find_relevance(FT_INFO *, byte *, uint); +void maria_ft_nlq_close_search(FT_INFO *); +float maria_ft_nlq_get_relevance(FT_INFO *); +my_off_t maria_ft_nlq_get_docid(FT_INFO *); +void maria_ft_nlq_reinit_search(FT_INFO *); + +extern const struct _ft_vft _ma_ft_vft_boolean; +int maria_ft_boolean_read_next(FT_INFO *, char *); +float maria_ft_boolean_find_relevance(FT_INFO *, byte *, uint); +void maria_ft_boolean_close_search(FT_INFO *); +float maria_ft_boolean_get_relevance(FT_INFO *); +my_off_t maria_ft_boolean_get_docid(FT_INFO *); +void maria_ft_boolean_reinit_search(FT_INFO *); +extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, + uint keynr); +extern void maria_ftparser_call_deinitializer(MARIA_HA *info); diff --git a/storage/maria/ma_fulltext.h b/storage/maria/ma_fulltext.h new file mode 100644 index 00000000000..946a5628175 --- /dev/null +++ b/storage/maria/ma_fulltext.h @@ -0,0 +1,28 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* some definitions for full-text indices */ + +#include "maria_def.h" +#include "ft_global.h" + +int _ma_ft_cmp(MARIA_HA *, uint, const byte *, const byte *); +int _ma_ft_add(MARIA_HA *, uint, byte *, const byte *, my_off_t); +int _ma_ft_del(MARIA_HA *, uint, byte *, const byte *, my_off_t); + +uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, uchar *); diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c new file mode 100644 index 00000000000..b22ffa41833 --- /dev/null +++ b/storage/maria/ma_info.c @@ -0,0 +1,133 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Return useful base information for an open table */ + +#include "maria_def.h" +#ifdef __WIN__ +#include +#endif + + /* Get position to last record */ + +my_off_t maria_position(MARIA_HA *info) +{ + return info->lastpos; +} + + +/* Get information about the table */ +/* if flag == 2 one get current info (no sync from database */ + +int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) +{ + MY_STAT state; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_status"); + + x->recpos = info->lastpos; + if (flag == HA_STATUS_POS) + DBUG_RETURN(0); /* Compatible with ISAM */ + if (!(flag & HA_STATUS_NO_LOCK)) + { + pthread_mutex_lock(&share->intern_lock); + VOID(_ma_readinfo(info,F_RDLCK,0)); + fast_ma_writeinfo(info); + pthread_mutex_unlock(&share->intern_lock); + } + if (flag & HA_STATUS_VARIABLE) + { + x->records = info->state->records; + x->deleted = info->state->del; + x->delete_length = info->state->empty; + x->data_file_length =info->state->data_file_length; + x->index_file_length=info->state->key_file_length; + + x->keys = share->state.header.keys; + x->check_time = share->state.check_time; + x->mean_reclength = info->state->records ? + (ulong) ((info->state->data_file_length-info->state->empty)/ + info->state->records) : (ulong) share->min_pack_length; + } + if (flag & HA_STATUS_ERRKEY) + { + x->errkey = info->errkey; + x->dupp_key_pos= info->dupp_key_pos; + } + if (flag & HA_STATUS_CONST) + { + x->reclength = share->base.reclength; + x->max_data_file_length=share->base.max_data_file_length; + x->max_index_file_length=info->s->base.max_key_file_length; + x->filenr = info->dfile; + x->options = share->options; + x->create_time=share->state.create_time; + x->reflength= maria_get_pointer_length(share->base.max_data_file_length, + maria_data_pointer_size); + x->record_offset= ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? + 0L : share->base.pack_reclength); + x->sortkey= -1; /* No clustering */ + x->rec_per_key = share->state.rec_per_key_part; + x->key_map = share->state.key_map; + x->data_file_name = share->data_file_name; + x->index_file_name = share->index_file_name; + } + if ((flag & HA_STATUS_TIME) && !my_fstat(info->dfile,&state,MYF(0))) + x->update_time=state.st_mtime; + else + x->update_time=0; + if (flag & HA_STATUS_AUTO) + { + x->auto_increment= share->state.auto_increment+1; + if (!x->auto_increment) /* This shouldn't happen */ + x->auto_increment= ~(ulonglong) 0; + } + DBUG_RETURN(0); +} + + +/* + Write a message to the error log. + + SYNOPSIS + _ma_report_error() + file_name Name of table file (e.g. index_file_name). + errcode Error number. + + DESCRIPTION + This function supplies my_error() with a table name. Most error + messages need one. Since string arguments in error messages are limited + to 64 characters by convention, we ensure that in case of truncation, + that the end of the index file path is in the message. This contains + the most valuable information (the table name and the database name). + + RETURN + void +*/ + +void _ma_report_error(int errcode, const char *file_name) +{ + size_t lgt; + DBUG_ENTER("_ma_report_error"); + DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name)); + + if ((lgt= strlen(file_name)) > 64) + file_name+= lgt - 64; + my_error(errcode, MYF(ME_NOREFRESH), file_name); + DBUG_VOID_RETURN; +} + diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c new file mode 100644 index 00000000000..fc526c6ca3a --- /dev/null +++ b/storage/maria/ma_init.c @@ -0,0 +1,59 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Initialize an maria-database */ + +#include "maria_def.h" +#include + +static int maria_inited= 0; +pthread_mutex_t THR_LOCK_maria; + +/* + Initialize maria + + SYNOPSIS + maria_init() + + TODO + Open log files and do recovery if need + + RETURN + 0 ok + # error number +*/ + +int maria_init(void) +{ + if (!maria_inited) + { + maria_inited= 1; + pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); + } + return 0; +} + + +void maria_end(void) +{ + if (maria_inited) + { + maria_inited= 0; + VOID(maria_logging(0)); /* Close log if neaded */ + ft_free_stopwords(); + pthread_mutex_destroy(&THR_LOCK_maria); + } +} diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c new file mode 100644 index 00000000000..6a8c647aa7f --- /dev/null +++ b/storage/maria/ma_key.c @@ -0,0 +1,592 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Functions to handle keys */ + +#include "maria_def.h" +#include "m_ctype.h" +#include "ma_sp_defs.h" +#ifdef HAVE_IEEEFP_H +#include +#endif + +#define CHECK_KEYS /* Enable safety checks */ + +#define FIX_LENGTH(cs, pos, length, char_length) \ + do { \ + if (length > char_length) \ + char_length= my_charpos(cs, pos, pos+length, char_length); \ + set_if_smaller(char_length,length); \ + } while(0) + +static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,byte *record); + +/* + Make a intern key from a record + + SYNOPSIS + _ma_make_key() + info MyiSAM handler + keynr key number + key Store created key here + record Record + filepos Position to record in the data file + + RETURN + Length of key +*/ + +uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos) +{ + byte *pos,*end; + uchar *start; + reg1 HA_KEYSEG *keyseg; + my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; + DBUG_ENTER("_ma_make_key"); + + if (info->s->keyinfo[keynr].flag & HA_SPATIAL) + { + /* + TODO: nulls processing + */ +#ifdef HAVE_SPATIAL + DBUG_RETURN(sp_make_key(info,keynr,key,record,filepos)); +#else + DBUG_ASSERT(0); /* maria_open should check that this never happens*/ +#endif + } + + start=key; + for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=keyseg->length; + uint char_length; + CHARSET_INFO *cs=keyseg->charset; + + if (keyseg->null_bit) + { + if (record[keyseg->null_pos] & keyseg->null_bit) + { + *key++= 0; /* NULL in key */ + continue; + } + *key++=1; /* Not NULL */ + } + + char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : + length); + + pos= (byte*) record+keyseg->start; + if (type == HA_KEYTYPE_BIT) + { + if (keyseg->bit_length) + { + uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos, + keyseg->bit_start, keyseg->bit_length); + *key++= bits; + length--; + } + memcpy((byte*) key, pos, length); + key+= length; + continue; + } + if (keyseg->flag & HA_SPACE_PACK) + { + end= pos + length; + if (type != HA_KEYTYPE_NUM) + { + while (end > pos && end[-1] == ' ') + end--; + } + else + { + while (pos < end && pos[0] == ' ') + pos++; + } + length=(uint) (end-pos); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy((byte*) key,(byte*) pos,(size_t) char_length); + key+=char_length; + continue; + } + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ + set_if_smaller(length,tmp_length); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy((byte*) key,(byte*) pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); + memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*)); + set_if_smaller(length,tmp_length); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy((byte*) key,(byte*) pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_SWAP_KEY) + { /* Numerical column */ +#ifdef HAVE_ISNAN + if (type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr,pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key,length); + key+=length; + continue; + } + } + else if (type == HA_KEYTYPE_DOUBLE) + { + double nr; + float8get(nr,pos); + if (isnan(nr)) + { + bzero(key,length); + key+=length; + continue; + } + } +#endif + pos+=length; + while (length--) + { + *key++ = *--pos; + } + continue; + } + FIX_LENGTH(cs, pos, length, char_length); + memcpy((byte*) key, pos, char_length); + if (length > char_length) + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); + key+= length; + } + _ma_dpointer(info,key,filepos); + DBUG_PRINT("exit",("keynr: %d",keynr)); + DBUG_DUMP("key",(byte*) start,(uint) (key-start)+keyseg->length); + DBUG_EXECUTE("key", + _ma_print_key(DBUG_FILE,info->s->keyinfo[keynr].seg,start, + (uint) (key-start));); + DBUG_RETURN((uint) (key-start)); /* Return keylength */ +} /* _ma_make_key */ + + +/* + Pack a key to intern format from given format (c_rkey) + + SYNOPSIS + _ma_pack_key() + info MARIA handler + uint keynr key number + key Store packed key here + old Not packed key + k_length Length of 'old' to use + last_used_keyseg out parameter. May be NULL + + RETURN + length of packed key + + last_use_keyseg Store pointer to the keyseg after the last used one +*/ + +uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, + uint k_length, HA_KEYSEG **last_used_keyseg) +{ + uchar *start_key=key; + HA_KEYSEG *keyseg; + my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; + DBUG_ENTER("_ma_pack_key"); + + for (keyseg=info->s->keyinfo[keynr].seg ; + keyseg->type && (int) k_length > 0; + old+=keyseg->length, keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=min((uint) keyseg->length,(uint) k_length); + uint char_length; + uchar *pos; + CHARSET_INFO *cs=keyseg->charset; + + if (keyseg->null_bit) + { + k_length--; + if (!(*key++= (char) 1-*old++)) /* Copy null marker */ + { + k_length-=length; + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + k_length-=2; /* Skip length */ + old+= 2; + } + continue; /* Found NULL */ + } + } + char_length= (!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : length; + pos=old; + if (keyseg->flag & HA_SPACE_PACK) + { + uchar *end=pos+length; + if (type != HA_KEYTYPE_NUM) + { + while (end > pos && end[-1] == ' ') + end--; + } + else + { + while (pos < end && pos[0] == ' ') + pos++; + } + k_length-=length; + length=(uint) (end-pos); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy((byte*) key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + /* Length of key-part used with maria_rkey() always 2 */ + uint tmp_length=uint2korr(pos); + k_length-= 2+length; + pos+=2; + set_if_smaller(length,tmp_length); /* Safety */ + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + old+=2; /* Skip length */ + memcpy((byte*) key, pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_SWAP_KEY) + { /* Numerical column */ + pos+=length; + k_length-=length; + while (length--) + { + *key++ = *--pos; + } + continue; + } + FIX_LENGTH(cs, pos, length, char_length); + memcpy((byte*) key, pos, char_length); + if (length > char_length) + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); + key+= length; + k_length-=length; + } + if (last_used_keyseg) + *last_used_keyseg= keyseg; + +#ifdef NOT_USED + if (keyseg->type) + { + /* Part-key ; fill with ASCII 0 for easier searching */ + length= (uint) -k_length; /* unused part of last key */ + do + { + if (keyseg->flag & HA_NULL_PART) + length++; + if (keyseg->flag & HA_SPACE_PACK) + length+=2; + else + length+= keyseg->length; + keyseg++; + } while (keyseg->type); + bzero((byte*) key,length); + key+=length; + } +#endif + DBUG_RETURN((uint) (key-start_key)); +} /* _ma_pack_key */ + + + +/* + Store found key in record + + SYNOPSIS + _ma_put_key_in_record() + info MARIA handler + keynr Key number that was used + record Store key here + + Last read key is in info->lastkey + + NOTES + Used when only-keyread is wanted + + RETURN + 0 ok + 1 error +*/ + +static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, + byte *record) +{ + reg2 byte *key; + byte *pos,*key_end; + reg1 HA_KEYSEG *keyseg; + byte *blob_ptr; + DBUG_ENTER("_ma_put_key_in_record"); + + blob_ptr= (byte*) info->lastkey2; /* Place to put blob parts */ + key=(byte*) info->lastkey; /* KEy that was read */ + key_end=key+info->lastkey_length; + for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) + { + if (keyseg->null_bit) + { + if (!*key++) + { + record[keyseg->null_pos]|= keyseg->null_bit; + continue; + } + record[keyseg->null_pos]&= ~keyseg->null_bit; + } + if (keyseg->type == HA_KEYTYPE_BIT) + { + uint length= keyseg->length; + + if (keyseg->bit_length) + { + uchar bits= *key++; + set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + length--; + } + else + { + clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + } + memcpy(record + keyseg->start, (byte*) key, length); + key+= length; + continue; + } + if (keyseg->flag & HA_SPACE_PACK) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + pos= record+keyseg->start; + if (keyseg->type != (int) HA_KEYTYPE_NUM) + { + memcpy(pos,key,(size_t) length); + bfill(pos+length,keyseg->length-length,' '); + } + else + { + bfill(pos,keyseg->length-length,' '); + memcpy(pos+keyseg->length-length,key,(size_t) length); + } + key+=length; + continue; + } + + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + /* Store key length */ + if (keyseg->bit_start == 1) + *(uchar*) (record+keyseg->start)= (uchar) length; + else + int2store(record+keyseg->start, length); + /* And key data */ + memcpy(record+keyseg->start + keyseg->bit_start, (byte*) key, length); + key+= length; + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + memcpy(record+keyseg->start+keyseg->bit_start, + (char*) &blob_ptr,sizeof(char*)); + memcpy(blob_ptr,key,length); + blob_ptr+=length; + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + _ma_store_blob_length(record+keyseg->start, + (uint) keyseg->bit_start,length); + key+=length; + } + else if (keyseg->flag & HA_SWAP_KEY) + { + byte *to= record+keyseg->start+keyseg->length; + byte *end= key+keyseg->length; +#ifdef CHECK_KEYS + if (end > key_end) + goto err; +#endif + do + { + *--to= *key++; + } while (key != end); + continue; + } + else + { +#ifdef CHECK_KEYS + if (key+keyseg->length > key_end) + goto err; +#endif + memcpy(record+keyseg->start,(byte*) key, + (size_t) keyseg->length); + key+= keyseg->length; + } + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); /* Crashed row */ +} /* _ma_put_key_in_record */ + + + /* Here when key reads are used */ + +int _ma_read_key_record(MARIA_HA *info, my_off_t filepos, byte *buf) +{ + fast_ma_writeinfo(info); + if (filepos != HA_OFFSET_ERROR) + { + if (info->lastinx >= 0) + { /* Read only key */ + if (_ma_put_key_in_record(info,(uint) info->lastinx,buf)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return -1; + } + info->update|= HA_STATE_AKTIV; /* We should find a record */ + return 0; + } + my_errno=HA_ERR_WRONG_INDEX; + } + return(-1); /* Wrong data to read */ +} + + +/* + Update auto_increment info + + SYNOPSIS + _ma_update_auto_increment() + info MARIA handler + record Row to update + + IMPLEMENTATION + Only replace the auto_increment value if it is higher than the previous + one. For signed columns we don't update the auto increment value if it's + less than zero. +*/ + +void _ma_update_auto_increment(MARIA_HA *info,const byte *record) +{ + ulonglong value= 0; /* Store unsigned values here */ + longlong s_value= 0; /* Store signed values here */ + HA_KEYSEG *keyseg= info->s->keyinfo[info->s->base.auto_key-1].seg; + const uchar *key= (uchar*) record + keyseg->start; + + switch (keyseg->type) { + case HA_KEYTYPE_INT8: + s_value= (longlong) *(char*)key; + break; + case HA_KEYTYPE_BINARY: + value=(ulonglong) *(uchar*) key; + break; + case HA_KEYTYPE_SHORT_INT: + s_value= (longlong) sint2korr(key); + break; + case HA_KEYTYPE_USHORT_INT: + value=(ulonglong) uint2korr(key); + break; + case HA_KEYTYPE_LONG_INT: + s_value= (longlong) sint4korr(key); + break; + case HA_KEYTYPE_ULONG_INT: + value=(ulonglong) uint4korr(key); + break; + case HA_KEYTYPE_INT24: + s_value= (longlong) sint3korr(key); + break; + case HA_KEYTYPE_UINT24: + value=(ulonglong) uint3korr(key); + break; + case HA_KEYTYPE_FLOAT: /* This shouldn't be used */ + { + float f_1; + float4get(f_1,key); + /* Ignore negative values */ + value = (f_1 < (float) 0.0) ? 0 : (ulonglong) f_1; + break; + } + case HA_KEYTYPE_DOUBLE: /* This shouldn't be used */ + { + double f_1; + float8get(f_1,key); + /* Ignore negative values */ + value = (f_1 < 0.0) ? 0 : (ulonglong) f_1; + break; + } + case HA_KEYTYPE_LONGLONG: + s_value= sint8korr(key); + break; + case HA_KEYTYPE_ULONGLONG: + value= uint8korr(key); + break; + default: + DBUG_ASSERT(0); + value=0; /* Error */ + break; + } + + /* + The following code works becasue if s_value < 0 then value is 0 + and if s_value == 0 then value will contain either s_value or the + correct value. + */ + set_if_bigger(info->s->state.auto_increment, + (s_value > 0) ? (ulonglong) s_value : value); +} diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c new file mode 100644 index 00000000000..837b0fbac66 --- /dev/null +++ b/storage/maria/ma_keycache.c @@ -0,0 +1,163 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Key cache assignments +*/ + +#include "maria_def.h" + +/* + Assign pages of the index file for a table to a key cache + + SYNOPSIS + maria_assign_to_key_cache() + info open table + key_map map of indexes to assign to the key cache + key_cache_ptr pointer to the key cache handle + assign_lock Mutex to lock during assignment + + PREREQUESTS + One must have a READ lock or a WRITE lock on the table when calling + the function to ensure that there is no other writers to it. + + The caller must also ensure that one doesn't call this function from + two different threads with the same table. + + NOTES + At present pages for all indexes must be assigned to the same key cache. + In future only pages for indexes specified in the key_map parameter + of the table will be assigned to the specified key cache. + + RETURN VALUE + 0 If a success + # Error code +*/ + +int maria_assign_to_key_cache(MARIA_HA *info, + ulonglong key_map __attribute__((unused)), + KEY_CACHE *key_cache) +{ + int error= 0; + MARIA_SHARE* share= info->s; + DBUG_ENTER("maria_assign_to_key_cache"); + DBUG_PRINT("enter",("old_key_cache_handle: %lx new_key_cache_handle: %lx", + share->key_cache, key_cache)); + + /* + Skip operation if we didn't change key cache. This can happen if we + call this for all open instances of the same table + */ + if (share->key_cache == key_cache) + DBUG_RETURN(0); + + /* + First flush all blocks for the table in the old key cache. + This is to ensure that the disk is consistent with the data pages + in memory (which may not be the case if the table uses delayed_key_write) + + Note that some other read thread may still fill in the key cache with + new blocks during this call and after, but this doesn't matter as + all threads will start using the new key cache for their next call to + maria library and we know that there will not be any changed blocks + in the old key cache. + */ + + if (flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Mark that table must be checked */ + } + + /* + Flush the new key cache for this file. This is needed to ensure + that there is no old blocks (with outdated data) left in the new key + cache from an earlier assign_to_keycache operation + + (This can never fail as there is never any not written data in the + new key cache) + */ + (void) flush_key_blocks(key_cache, share->kfile, FLUSH_RELEASE); + + /* + ensure that setting the key cache and changing the multi_key_cache + is done atomicly + */ + pthread_mutex_lock(&share->intern_lock); + /* + Tell all threads to use the new key cache + This should be seen at the lastes for the next call to an maria function. + */ + share->key_cache= key_cache; + + /* store the key cache in the global hash structure for future opens */ + if (multi_key_cache_set(share->unique_file_name, share->unique_name_length, + share->key_cache)) + error= my_errno; + pthread_mutex_unlock(&share->intern_lock); + DBUG_RETURN(error); +} + + +/* + Change all MARIA entries that uses one key cache to another key cache + + SYNOPSIS + maria_change_key_cache() + old_key_cache Old key cache + new_key_cache New key cache + + NOTES + This is used when we delete one key cache. + + To handle the case where some other threads tries to open an MARIA + table associated with the to-be-deleted key cache while this operation + is running, we have to call 'multi_key_cache_change()' from this + function while we have a lock on the MARIA table list structure. + + This is safe as long as it's only MARIA that is using this specific + key cache. +*/ + + +void maria_change_key_cache(KEY_CACHE *old_key_cache, + KEY_CACHE *new_key_cache) +{ + LIST *pos; + DBUG_ENTER("maria_change_key_cache"); + + /* + Lock list to ensure that no one can close the table while we manipulate it + */ + pthread_mutex_lock(&THR_LOCK_maria); + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info= (MARIA_HA*) pos->data; + MARIA_SHARE *share= info->s; + if (share->key_cache == old_key_cache) + maria_assign_to_key_cache(info, (ulonglong) ~0, new_key_cache); + } + + /* + We have to do the following call while we have the lock on the + MARIA list structure to ensure that another thread is not trying to + open a new table that will be associted with the old key cache + */ + multi_key_cache_change(old_key_cache, new_key_cache); + pthread_mutex_unlock(&THR_LOCK_maria); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c new file mode 100644 index 00000000000..697747021f8 --- /dev/null +++ b/storage/maria/ma_locking.c @@ -0,0 +1,554 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + locking of isam-tables. + reads info from a isam-table. Must be first request before doing any furter + calls to any isamfunktion. Is used to allow many process use the same + isamdatabase. +*/ + +#include "ma_ftdefs.h" + + /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */ + +int maria_lock_database(MARIA_HA *info, int lock_type) +{ + int error; + uint count; + MARIA_SHARE *share=info->s; + uint flag; + DBUG_ENTER("maria_lock_database"); + DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u " + "global_changed: %d open_count: %u name: '%s'", + lock_type, info->lock_type, share->r_locks, + share->w_locks, + share->global_changed, share->state.open_count, + share->index_file_name)); + if (share->options & HA_OPTION_READ_ONLY_DATA || + info->lock_type == lock_type) + DBUG_RETURN(0); + if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */ + { + ++share->w_locks; + ++share->tot_locks; + info->lock_type= lock_type; + DBUG_RETURN(0); + } + + flag=error=0; + pthread_mutex_lock(&share->intern_lock); + if (share->kfile >= 0) /* May only be false on windows */ + { + switch (lock_type) { + case F_UNLCK: + maria_ftparser_call_deinitializer(info); + if (info->lock_type == F_RDLCK) + count= --share->r_locks; + else + count= --share->w_locks; + --share->tot_locks; + if (info->lock_type == F_WRLCK && !share->w_locks && + !share->delay_key_write && flush_key_blocks(share->key_cache, + share->kfile,FLUSH_KEEP)) + { + error=my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Mark that table must be checked */ + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + if (end_io_cache(&info->rec_cache)) + { + error=my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + } + if (!count) + { + DBUG_PRINT("info",("changed: %u w_locks: %u", + (uint) share->changed, share->w_locks)); + if (share->changed && !share->w_locks) + { +#ifdef HAVE_MMAP + if ((info->s->mmaped_length != info->s->state.state.data_file_length) && + (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS)) + { + if (info->s->concurrent_insert) + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, info->s->state.state.data_file_length); + info->s->nonmmaped_inserts= 0; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + } +#endif + share->state.process= share->last_process=share->this_process; + share->state.unique= info->last_unique= info->this_unique; + share->state.update_count= info->last_loop= ++info->this_loop; + if (_ma_state_info_write(share->kfile, &share->state, 1)) + error=my_errno; + share->changed=0; + if (maria_flush) + { + if (my_sync(share->kfile, MYF(0))) + error= my_errno; + if (my_sync(info->dfile, MYF(0))) + error= my_errno; + } + else + share->not_flushed=1; + if (error) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + } + if (info->lock_type != F_EXTRA_LCK) + { + if (share->r_locks) + { /* Only read locks left */ + flag=1; + if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, + MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) + error=my_errno; + } + else if (!share->w_locks) + { /* No more locks */ + flag=1; + if (my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, + MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) + error=my_errno; + } + } + } + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + info->lock_type= F_UNLCK; + break; + case F_RDLCK: + if (info->lock_type == F_WRLCK) + { + /* + Change RW to READONLY + + mysqld does not turn write locks to read locks, + so we're never here in mysqld. + */ + if (share->w_locks == 1) + { + flag=1; + if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, + MYF(MY_SEEK_NOT_DONE))) + { + error=my_errno; + break; + } + } + share->w_locks--; + share->r_locks++; + info->lock_type=lock_type; + break; + } + if (!share->r_locks && !share->w_locks) + { + flag=1; + if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, + info->lock_wait | MY_SEEK_NOT_DONE)) + { + error=my_errno; + break; + } + if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) + { + error=my_errno; + VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE))); + my_errno=error; + break; + } + } + VOID(_ma_test_if_changed(info)); + share->r_locks++; + share->tot_locks++; + info->lock_type=lock_type; + break; + case F_WRLCK: + if (info->lock_type == F_RDLCK) + { /* Change READONLY to RW */ + if (share->r_locks == 1) + { + flag=1; + if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, + MYF(info->lock_wait | MY_SEEK_NOT_DONE))) + { + error=my_errno; + break; + } + share->r_locks--; + share->w_locks++; + info->lock_type=lock_type; + break; + } + } + if (!(share->options & HA_OPTION_READ_ONLY_DATA)) + { + if (!share->w_locks) + { + flag=1; + if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, + info->lock_wait | MY_SEEK_NOT_DONE)) + { + error=my_errno; + break; + } + if (!share->r_locks) + { + if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) + { + error=my_errno; + VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, + info->lock_wait | MY_SEEK_NOT_DONE)); + my_errno=error; + break; + } + } + } + } + VOID(_ma_test_if_changed(info)); + + info->lock_type=lock_type; + info->invalidator=info->s->invalidator; + share->w_locks++; + share->tot_locks++; + break; + default: + break; /* Impossible */ + } + } + pthread_mutex_unlock(&share->intern_lock); +#if defined(FULL_LOG) || defined(_lint) + lock_type|=(int) (flag << 8); /* Set bit to set if real lock */ + maria_log_command(MARIA_LOG_LOCK,info,(byte*) &lock_type,sizeof(lock_type), + error); +#endif + DBUG_RETURN(error); +} /* maria_lock_database */ + + +/**************************************************************************** + The following functions are called by thr_lock() in threaded applications +****************************************************************************/ + +/* + Create a copy of the current status for the table + + SYNOPSIS + _ma_get_status() + param Pointer to Myisam handler + concurrent_insert Set to 1 if we are going to do concurrent inserts + (THR_WRITE_CONCURRENT_INSERT was used) +*/ + +void _ma_get_status(void* param, int concurrent_insert) +{ + MARIA_HA *info=(MARIA_HA*) param; + DBUG_ENTER("_ma_get_status"); + DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d", + (long) info->s->state.state.key_file_length, + (long) info->s->state.state.data_file_length, + concurrent_insert)); +#ifndef DBUG_OFF + if (info->state->key_file_length > info->s->state.state.key_file_length || + info->state->data_file_length > info->s->state.state.data_file_length) + DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", + (long) info->state->key_file_length, + (long) info->state->data_file_length)); +#endif + info->save_state=info->s->state.state; + info->state= &info->save_state; + info->append_insert_at_end= concurrent_insert; + DBUG_VOID_RETURN; +} + + +void _ma_update_status(void* param) +{ + MARIA_HA *info=(MARIA_HA*) param; + /* + Because someone may have closed the table we point at, we only + update the state if its our own state. This isn't a problem as + we are always pointing at our own lock or at a read lock. + (This is enforced by thr_multi_lock.c) + */ + if (info->state == &info->save_state) + { +#ifndef DBUG_OFF + DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld", + (long) info->state->key_file_length, + (long) info->state->data_file_length)); + if (info->state->key_file_length < info->s->state.state.key_file_length || + info->state->data_file_length < info->s->state.state.data_file_length) + DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", + (long) info->s->state.state.key_file_length, + (long) info->s->state.state.data_file_length)); +#endif + info->s->state.state= *info->state; + info->state= &info->s->state.state; + } + info->append_insert_at_end= 0; + + /* + We have to flush the write cache here as other threads may start + reading the table before maria_lock_database() is called + */ + if (info->opt_flag & WRITE_CACHE_USED) + { + if (end_io_cache(&info->rec_cache)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->opt_flag&= ~WRITE_CACHE_USED; + } +} + +void _ma_copy_status(void* to,void *from) +{ + ((MARIA_HA*) to)->state= &((MARIA_HA*) from)->save_state; +} + + +/* + Check if should allow concurrent inserts + + IMPLEMENTATION + Allow concurrent inserts if we don't have a hole in the table or + if there is no active write lock and there is active read locks and + maria_concurrent_insert == 2. In this last case the new + row('s) are inserted at end of file instead of filling up the hole. + + The last case is to allow one to inserts into a heavily read-used table + even if there is holes. + + NOTES + If there is a an rtree indexes in the table, concurrent inserts are + disabled in maria_open() + + RETURN + 0 ok to use concurrent inserts + 1 not ok +*/ + +my_bool _ma_check_status(void *param) +{ + MARIA_HA *info=(MARIA_HA*) param; + /* + The test for w_locks == 1 is here because this thread has already done an + external lock (in other words: w_locks == 1 means no other threads has + a write lock) + */ + DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u", + (long) info->s->state.dellink, (uint) info->s->r_locks, + (uint) info->s->w_locks)); + return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR || + (maria_concurrent_insert == 2 && info->s->r_locks && + info->s->w_locks == 1)); +} + + +/**************************************************************************** + ** functions to read / write the state +****************************************************************************/ + +int _ma_readinfo(register MARIA_HA *info, int lock_type, int check_keybuffer) +{ + DBUG_ENTER("_ma_readinfo"); + + if (info->lock_type == F_UNLCK) + { + MARIA_SHARE *share=info->s; + if (!share->tot_locks) + { + if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, + info->lock_wait | MY_SEEK_NOT_DONE)) + DBUG_RETURN(1); + if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) + { + int error=my_errno ? my_errno : -1; + VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, + MYF(MY_SEEK_NOT_DONE))); + my_errno=error; + DBUG_RETURN(1); + } + } + if (check_keybuffer) + VOID(_ma_test_if_changed(info)); + info->invalidator=info->s->invalidator; + } + else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK) + { + my_errno=EACCES; /* Not allowed to change */ + DBUG_RETURN(-1); /* when have read_lock() */ + } + DBUG_RETURN(0); +} /* _ma_readinfo */ + + +/* + Every isam-function that uppdates the isam-database MUST end with this + request +*/ + +int _ma_writeinfo(register MARIA_HA *info, uint operation) +{ + int error,olderror; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_writeinfo"); + DBUG_PRINT("info",("operation: %u tot_locks: %u", operation, + share->tot_locks)); + + error=0; + if (share->tot_locks == 0) + { + olderror=my_errno; /* Remember last error */ + if (operation) + { /* Two threads can't be here */ + share->state.process= share->last_process= share->this_process; + share->state.unique= info->last_unique= info->this_unique; + share->state.update_count= info->last_loop= ++info->this_loop; + if ((error=_ma_state_info_write(share->kfile, &share->state, 1))) + olderror=my_errno; +#ifdef __WIN__ + if (maria_flush) + { + _commit(share->kfile); + _commit(info->dfile); + } +#endif + } + if (!(operation & WRITEINFO_NO_UNLOCK) && + my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, + MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) + DBUG_RETURN(1); + my_errno=olderror; + } + else if (operation) + share->changed= 1; /* Mark keyfile changed */ + DBUG_RETURN(error); +} /* _ma_writeinfo */ + + + /* Test if someone has changed the database */ + /* (Should be called after readinfo) */ + +int _ma_test_if_changed(register MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + if (share->state.process != share->last_process || + share->state.unique != info->last_unique || + share->state.update_count != info->last_loop) + { /* Keyfile has changed */ + DBUG_PRINT("info",("index file changed")); + if (share->state.process != share->this_process) + VOID(flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE)); + share->last_process=share->state.process; + info->last_unique= share->state.unique; + info->last_loop= share->state.update_count; + info->update|= HA_STATE_WRITTEN; /* Must use file on next */ + info->data_changed= 1; /* For maria_is_changed */ + return 1; + } + return (!(info->update & HA_STATE_AKTIV) || + (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED | + HA_STATE_KEY_CHANGED))); +} /* _ma_test_if_changed */ + + +/* + Put a mark in the .MYI file that someone is updating the table + + + DOCUMENTATION + + state.open_count in the .MYI file is used the following way: + - For the first change of the .MYI file in this process open_count is + incremented by maria_mark_file_change(). (We have a write lock on the file + when this happens) + - In maria_close() it's decremented by _ma_decrement_open_count() if it + was incremented in the same process. + + This mean that if we are the only process using the file, the open_count + tells us if the MARIA file wasn't properly closed. (This is true if + my_disable_locking is set). +*/ + + +int _ma_mark_file_changed(MARIA_HA *info) +{ + char buff[3]; + register MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_mark_file_changed"); + + if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed) + { + share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED | + STATE_NOT_OPTIMIZED_KEYS); + if (!share->global_changed) + { + share->global_changed=1; + share->state.open_count++; + } + if (!share->temporary) + { + mi_int2store(buff,share->state.open_count); + buff[2]=1; /* Mark that it's changed */ + DBUG_RETURN(my_pwrite(share->kfile,buff,sizeof(buff), + sizeof(share->state.header), + MYF(MY_NABP))); + } + } + DBUG_RETURN(0); +} + + +/* + This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite() + call. In these context the following code should be safe! + */ + +int _ma_decrement_open_count(MARIA_HA *info) +{ + char buff[2]; + register MARIA_SHARE *share=info->s; + int lock_error=0,write_error=0; + if (share->global_changed) + { + uint old_lock=info->lock_type; + share->global_changed=0; + lock_error=maria_lock_database(info,F_WRLCK); + /* Its not fatal even if we couldn't get the lock ! */ + if (share->state.open_count > 0) + { + share->state.open_count--; + mi_int2store(buff,share->state.open_count); + write_error=my_pwrite(share->kfile,buff,sizeof(buff), + sizeof(share->state.header), + MYF(MY_NABP)); + } + if (!lock_error) + lock_error=maria_lock_database(info,old_lock); + } + return test(lock_error || write_error); +} diff --git a/storage/maria/ma_log.c b/storage/maria/ma_log.c new file mode 100644 index 00000000000..7c32c1068cb --- /dev/null +++ b/storage/maria/ma_log.c @@ -0,0 +1,164 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Logging of MARIA commands and records on logfile for debugging + The log can be examined with help of the marialog command. +*/ + +#include "maria_def.h" +#if defined(MSDOS) || defined(__WIN__) +#include +#ifndef __WIN__ +#include +#endif +#endif +#ifdef VMS +#include +#endif + +#undef GETPID /* For HPUX */ +#ifdef THREAD +#define GETPID() (log_type == 1 ? (long) maria_pid : (long) my_thread_id()); +#else +#define GETPID() maria_pid +#endif + + /* Activate logging if flag is 1 and reset logging if flag is 0 */ + +static int log_type=0; +ulong maria_pid=0; + +int maria_logging(int activate_log) +{ + int error=0; + char buff[FN_REFLEN]; + DBUG_ENTER("maria_logging"); + + log_type=activate_log; + if (activate_log) + { + if (!maria_pid) + maria_pid=(ulong) getpid(); + if (maria_log_file < 0) + { + if ((maria_log_file = my_create(fn_format(buff,maria_log_filename, + "",".log",4), + 0,(O_RDWR | O_BINARY | O_APPEND),MYF(0))) + < 0) + DBUG_RETURN(my_errno); + } + } + else if (maria_log_file >= 0) + { + error=my_close(maria_log_file,MYF(0)) ? my_errno : 0 ; + maria_log_file= -1; + } + DBUG_RETURN(error); +} + + + /* Logging of records and commands on logfile */ + /* All logs starts with command(1) dfile(2) process(4) result(2) */ + +void _ma_log(enum maria_log_commands command, MARIA_HA *info, + const byte *buffert, uint length) +{ + char buff[11]; + int error,old_errno; + ulong pid=(ulong) GETPID(); + old_errno=my_errno; + bzero(buff,sizeof(buff)); + buff[0]=(char) command; + mi_int2store(buff+1,info->dfile); + mi_int4store(buff+3,pid); + mi_int2store(buff+9,length); + + pthread_mutex_lock(&THR_LOCK_maria); + error=my_lock(maria_log_file,F_WRLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); + VOID(my_write(maria_log_file,buff,sizeof(buff),MYF(0))); + VOID(my_write(maria_log_file,buffert,length,MYF(0))); + if (!error) + error=my_lock(maria_log_file,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); + pthread_mutex_unlock(&THR_LOCK_maria); + my_errno=old_errno; +} + + +void _ma_log_command(enum maria_log_commands command, MARIA_HA *info, + const byte *buffert, uint length, int result) +{ + char buff[9]; + int error,old_errno; + ulong pid=(ulong) GETPID(); + + old_errno=my_errno; + buff[0]=(char) command; + mi_int2store(buff+1,info->dfile); + mi_int4store(buff+3,pid); + mi_int2store(buff+7,result); + pthread_mutex_lock(&THR_LOCK_maria); + error=my_lock(maria_log_file,F_WRLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); + VOID(my_write(maria_log_file,buff,sizeof(buff),MYF(0))); + if (buffert) + VOID(my_write(maria_log_file,buffert,length,MYF(0))); + if (!error) + error=my_lock(maria_log_file,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); + pthread_mutex_unlock(&THR_LOCK_maria); + my_errno=old_errno; +} + + +void _ma_log_record(enum maria_log_commands command, MARIA_HA *info, + const byte *record, my_off_t filepos, int result) +{ + char buff[21],*pos; + int error,old_errno; + uint length; + ulong pid=(ulong) GETPID(); + + old_errno=my_errno; + if (!info->s->base.blobs) + length=info->s->base.reclength; + else + length=info->s->base.reclength+ _ma_calc_total_blob_length(info,record); + buff[0]=(char) command; + mi_int2store(buff+1,info->dfile); + mi_int4store(buff+3,pid); + mi_int2store(buff+7,result); + mi_sizestore(buff+9,filepos); + mi_int4store(buff+17,length); + pthread_mutex_lock(&THR_LOCK_maria); + error=my_lock(maria_log_file,F_WRLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); + VOID(my_write(maria_log_file,buff,sizeof(buff),MYF(0))); + VOID(my_write(maria_log_file,(byte*) record,info->s->base.reclength,MYF(0))); + if (info->s->base.blobs) + { + MARIA_BLOB *blob,*end; + + for (end=info->blobs+info->s->base.blobs, blob= info->blobs; + blob != end ; + blob++) + { + memcpy_fixed(&pos,record+blob->offset+blob->pack_length,sizeof(char*)); + VOID(my_write(maria_log_file,pos,blob->length,MYF(0))); + } + } + if (!error) + error=my_lock(maria_log_file,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); + pthread_mutex_unlock(&THR_LOCK_maria); + my_errno=old_errno; +} diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c new file mode 100644 index 00000000000..e1f1088c6d1 --- /dev/null +++ b/storage/maria/ma_open.c @@ -0,0 +1,1288 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* open a isam-database */ + +#include "ma_fulltext.h" +#include "ma_sp_defs.h" +#include "ma_rt_index.h" +#include + +#if defined(MSDOS) || defined(__WIN__) +#ifdef __WIN__ +#include +#else +#include /* Prototype for getpid */ +#endif +#endif +#ifdef VMS +#include "static.c" +#endif + +static void setup_key_functions(MARIA_KEYDEF *keyinfo); +#define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \ + pos+=size;} + + +#define disk_pos_assert(pos, end_pos) \ +if (pos > end_pos) \ +{ \ + my_errno=HA_ERR_CRASHED; \ + goto err; \ +} + + +/****************************************************************************** +** Return the shared struct if the table is already open. +** In MySQL the server will handle version issues. +******************************************************************************/ + +MARIA_HA *_ma_test_if_reopen(char *filename) +{ + LIST *pos; + + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info=(MARIA_HA*) pos->data; + MARIA_SHARE *share=info->s; + if (!strcmp(share->unique_file_name,filename) && share->last_version) + return info; + } + return 0; +} + + +/****************************************************************************** + open a MARIA database. + See my_base.h for the handle_locking argument + if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table + is marked crashed or if we are not using locking and the table doesn't + have an open count of 0. +******************************************************************************/ + +MARIA_HA *maria_open(const char *name, int mode, uint open_flags) +{ + int lock_error,kfile,open_mode,save_errno,have_rtree=0; + uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys, + key_parts,unique_key_parts,fulltext_keys,uniques; + char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN], + data_name[FN_REFLEN]; + char *disk_cache, *disk_pos, *end_pos; + MARIA_HA info,*m_info,*old_info; + MARIA_SHARE share_buff,*share; + ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MARIA_MAX_KEY_BLOCK_SIZE]; + ulonglong max_key_file_length, max_data_file_length; + DBUG_ENTER("maria_open"); + + LINT_INIT(m_info); + kfile= -1; + lock_error=1; + errpos=0; + head_length=sizeof(share_buff.state.header); + bzero((byte*) &info,sizeof(info)); + + my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, + MY_UNPACK_FILENAME|MY_APPEND_EXT),MYF(0)); + pthread_mutex_lock(&THR_LOCK_maria); + if (!(old_info=_ma_test_if_reopen(name_buff))) + { + share= &share_buff; + bzero((gptr) &share_buff,sizeof(share_buff)); + share_buff.state.rec_per_key_part=rec_per_key_part; + share_buff.state.key_root=key_root; + share_buff.state.key_del=key_del; + share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff), + maria_key_cache); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open", + if (strstr(name, "/t1")) + { + my_errno= HA_ERR_CRASHED; + goto err; + }); + if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0) + { + if ((errno != EROFS && errno != EACCES) || + mode != O_RDONLY || + (kfile=my_open(name_buff,(open_mode=O_RDONLY) | O_SHARE,MYF(0))) < 0) + goto err; + } + share->mode=open_mode; + errpos=1; + if (my_read(kfile,(char*) share->state.header.file_version,head_length, + MYF(MY_NABP))) + { + my_errno= HA_ERR_NOT_A_TABLE; + goto err; + } + if (memcmp((byte*) share->state.header.file_version, + (byte*) maria_file_magic, 4)) + { + DBUG_PRINT("error",("Wrong header in %s",name_buff)); + DBUG_DUMP("error_dump",(char*) share->state.header.file_version, + head_length); + my_errno=HA_ERR_NOT_A_TABLE; + goto err; + } + share->options= mi_uint2korr(share->state.header.options); + if (share->options & + ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS | + HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA | + HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE | + HA_OPTION_RELIES_ON_SQL_LAYER)) + { + DBUG_PRINT("error",("wrong options: 0x%lx", share->options)); + my_errno=HA_ERR_OLD_FILE; + goto err; + } + if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) && + ! (open_flags & HA_OPEN_FROM_SQL_LAYER)) + { + DBUG_PRINT("error", ("table cannot be openned from non-sql layer")); + my_errno= HA_ERR_UNSUPPORTED; + goto err; + } + /* Don't call realpath() if the name can't be a link */ + if (!strcmp(name_buff, org_name) || + my_readlink(index_name, org_name, MYF(0)) == -1) + (void) strmov(index_name, org_name); + *strrchr(org_name, '.')= '\0'; + (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT, + MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS); + + info_length=mi_uint2korr(share->state.header.header_length); + base_pos=mi_uint2korr(share->state.header.base_pos); + if (!(disk_cache=(char*) my_alloca(info_length+128))) + { + my_errno=ENOMEM; + goto err; + } + end_pos=disk_cache+info_length; + errpos=2; + + VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0))); + if (!(open_flags & HA_OPEN_TMP_TABLE)) + { + if ((lock_error=my_lock(kfile,F_RDLCK,0L,F_TO_EOF, + MYF(open_flags & HA_OPEN_WAIT_IF_LOCKED ? + 0 : MY_DONT_WAIT))) && + !(open_flags & HA_OPEN_IGNORE_IF_LOCKED)) + goto err; + } + errpos=3; + if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP))) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + len=mi_uint2korr(share->state.header.state_info_length); + keys= (uint) share->state.header.keys; + uniques= (uint) share->state.header.uniques; + fulltext_keys= (uint) share->state.header.fulltext_keys; + key_parts= mi_uint2korr(share->state.header.key_parts); + unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts); + if (len != MARIA_STATE_INFO_SIZE) + { + DBUG_PRINT("warning", + ("saved_state_info_length: %d state_info_length: %d", + len,MARIA_STATE_INFO_SIZE)); + } + share->state_diff_length=len-MARIA_STATE_INFO_SIZE; + + _ma_state_info_read((uchar*) disk_cache, &share->state); + len= mi_uint2korr(share->state.header.base_info_length); + if (len != MARIA_BASE_INFO_SIZE) + { + DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d", + len,MARIA_BASE_INFO_SIZE)); + } + disk_pos= (char*) + _ma_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); + share->state.state_length=base_pos; + + if (!(open_flags & HA_OPEN_FOR_REPAIR) && + ((share->state.changed & STATE_CRASHED) || + ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && + (my_disable_locking && share->state.open_count)))) + { + DBUG_PRINT("error",("Table is marked as crashed")); + my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? + HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); + goto err; + } + + /* sanity check */ + if (share->base.keystart > 65535 || share->base.rec_reflength > 8) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + + key_parts+=fulltext_keys*FT_SEGS; + if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MARIA_MAX_KEY || + key_parts >= MARIA_MAX_KEY * HA_MAX_KEY_SEG) + { + DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); + my_errno=HA_ERR_UNSUPPORTED; + goto err; + } + + /* Correct max_file_length based on length of sizeof(off_t) */ + max_data_file_length= + (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? + (((ulonglong) 1 << (share->base.rec_reflength*8))-1) : + (_ma_safe_mul(share->base.pack_reclength, + (ulonglong) 1 << (share->base.rec_reflength*8))-1); + max_key_file_length= + _ma_safe_mul(MARIA_MIN_KEY_BLOCK_LENGTH, + ((ulonglong) 1 << (share->base.key_reflength*8))-1); +#if SIZEOF_OFF_T == 4 + set_if_smaller(max_data_file_length, INT_MAX32); + set_if_smaller(max_key_file_length, INT_MAX32); +#endif +#if USE_RAID && SYSTEM_SIZEOF_OFF_T == 4 + set_if_smaller(max_key_file_length, INT_MAX32); + if (!share->base.raid_type) + { + set_if_smaller(max_data_file_length, INT_MAX32); + } + else + { + set_if_smaller(max_data_file_length, + (ulonglong) share->base.raid_chunks << 31); + } +#elif !defined(USE_RAID) + if (share->base.raid_type) + { + DBUG_PRINT("error",("Table uses RAID but we don't have RAID support")); + my_errno=HA_ERR_UNSUPPORTED; + goto err; + } +#endif + share->base.max_data_file_length=(my_off_t) max_data_file_length; + share->base.max_key_file_length=(my_off_t) max_key_file_length; + + if (share->options & HA_OPTION_COMPRESS_RECORD) + share->base.max_key_length+=2; /* For safety */ + + if (!my_multi_malloc(MY_WME, + &share,sizeof(*share), + &share->state.rec_per_key_part,sizeof(long)*key_parts, + &share->keyinfo,keys*sizeof(MARIA_KEYDEF), + &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF), + &share->keyparts, + (key_parts+unique_key_parts+keys+uniques) * + sizeof(HA_KEYSEG), + &share->rec, + (share->base.fields+1)*sizeof(MARIA_COLUMNDEF), + &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs, + &share->unique_file_name,strlen(name_buff)+1, + &share->index_file_name,strlen(index_name)+1, + &share->data_file_name,strlen(data_name)+1, + &share->state.key_root,keys*sizeof(my_off_t), + &share->state.key_del, + (share->state.header.max_block_size*sizeof(my_off_t)), +#ifdef THREAD + &share->key_root_lock,sizeof(rw_lock_t)*keys, +#endif + &share->mmap_lock,sizeof(rw_lock_t), + NullS)) + goto err; + errpos=4; + *share=share_buff; + memcpy((char*) share->state.rec_per_key_part, + (char*) rec_per_key_part, sizeof(long)*key_parts); + memcpy((char*) share->state.key_root, + (char*) key_root, sizeof(my_off_t)*keys); + memcpy((char*) share->state.key_del, + (char*) key_del, (sizeof(my_off_t) * + share->state.header.max_block_size)); + strmov(share->unique_file_name, name_buff); + share->unique_name_length= strlen(name_buff); + strmov(share->index_file_name, index_name); + strmov(share->data_file_name, data_name); + + share->blocksize=min(IO_SIZE,maria_block_size); + { + HA_KEYSEG *pos=share->keyparts; + for (i=0 ; i < keys ; i++) + { + share->keyinfo[i].share= share; + disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]); + disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE, + end_pos); + if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE) + have_rtree=1; + set_if_smaller(share->blocksize,share->keyinfo[i].block_length); + share->keyinfo[i].seg=pos; + for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++) + { + disk_pos=_ma_keyseg_read(disk_pos, pos); + + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) + { + if (!pos->language) + pos->charset=default_charset_info; + else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME)))) + { + my_errno=HA_ERR_UNKNOWN_CHARSET; + goto err; + } + } + } + if (share->keyinfo[i].flag & HA_SPATIAL) + { +#ifdef HAVE_SPATIAL + uint sp_segs=SPDIMS*2; + share->keyinfo[i].seg=pos-sp_segs; + share->keyinfo[i].keysegs--; +#else + my_errno=HA_ERR_UNSUPPORTED; + goto err; +#endif + } + else if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if (!fulltext_keys) + { /* 4.0 compatibility code, to be removed in 5.0 */ + share->keyinfo[i].seg=pos-FT_SEGS; + share->keyinfo[i].keysegs-=FT_SEGS; + } + else + { + uint j; + share->keyinfo[i].seg=pos; + for (j=0; j < FT_SEGS; j++) + { + *pos= ft_keysegs[j]; + pos[0].language= pos[-1].language; + if (!(pos[0].charset= pos[-1].charset)) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + pos++; + } + } + if (!share->ft2_keyinfo.seg) + { + memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MARIA_KEYDEF)); + share->ft2_keyinfo.keysegs=1; + share->ft2_keyinfo.flag=0; + share->ft2_keyinfo.keylength= + share->ft2_keyinfo.minlength= + share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength; + share->ft2_keyinfo.seg=pos-1; + share->ft2_keyinfo.end=pos; + setup_key_functions(& share->ft2_keyinfo); + } + } + setup_key_functions(share->keyinfo+i); + share->keyinfo[i].end=pos; + pos->type=HA_KEYTYPE_END; /* End */ + pos->length=share->base.rec_reflength; + pos->null_bit=0; + pos->flag=0; /* For purify */ + pos++; + } + for (i=0 ; i < uniques ; i++) + { + disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]); + disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs * + HA_KEYSEG_SIZE, end_pos); + share->uniqueinfo[i].seg=pos; + for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++) + { + disk_pos=_ma_keyseg_read(disk_pos, pos); + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) + { + if (!pos->language) + pos->charset=default_charset_info; + else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME)))) + { + my_errno=HA_ERR_UNKNOWN_CHARSET; + goto err; + } + } + } + share->uniqueinfo[i].end=pos; + pos->type=HA_KEYTYPE_END; /* End */ + pos->null_bit=0; + pos->flag=0; + pos++; + } + share->ftparsers= 0; + } + + disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, end_pos); + for (i=j=offset=0 ; i < share->base.fields ; i++) + { + disk_pos=_ma_recinfo_read(disk_pos,&share->rec[i]); + share->rec[i].pack_type=0; + share->rec[i].huff_tree=0; + share->rec[i].offset=offset; + if (share->rec[i].type == (int) FIELD_BLOB) + { + share->blobs[j].pack_length= + share->rec[i].length-maria_portable_sizeof_char_ptr;; + share->blobs[j].offset=offset; + j++; + } + offset+=share->rec[i].length; + } + share->rec[i].type=(int) FIELD_LAST; /* End marker */ + + if (! lock_error) + { + VOID(my_lock(kfile,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE))); + lock_error=1; /* Database unlocked */ + } + + if (_ma_open_datafile(&info, share, -1)) + goto err; + errpos=5; + + share->kfile=kfile; + share->this_process=(ulong) getpid(); + share->last_process= share->state.process; + share->base.key_parts=key_parts; + share->base.all_key_parts=key_parts+unique_key_parts; + if (!(share->last_version=share->state.version)) + share->last_version=1; /* Safety */ + share->rec_reflength=share->base.rec_reflength; /* May be changed */ + share->base.margin_key_file_length=(share->base.max_key_file_length - + (keys ? MARIA_INDEX_BLOCK_MARGIN * + share->blocksize * keys : 0)); + share->blocksize=min(IO_SIZE,maria_block_size); + share->data_file_type=STATIC_RECORD; + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + share->data_file_type = COMPRESSED_RECORD; + share->options|= HA_OPTION_READ_ONLY_DATA; + info.s=share; + if (_ma_read_pack_info(&info, + (pbool) + test(!(share->options & + (HA_OPTION_PACK_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD))))) + goto err; + } + else if (share->options & HA_OPTION_PACK_RECORD) + share->data_file_type = DYNAMIC_RECORD; + my_afree((gptr) disk_cache); + _ma_setup_functions(share); +#ifdef THREAD + thr_lock_init(&share->lock); + VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST)); + for (i=0; ikey_root_lock[i], NULL)); + VOID(my_rwlock_init(&share->mmap_lock, NULL)); + if (!thr_lock_inited) + { + /* Probably a single threaded program; Don't use concurrent inserts */ + maria_concurrent_insert=0; + } + else if (maria_concurrent_insert) + { + share->concurrent_insert= + ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE | + HA_OPTION_COMPRESS_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD)) || + (open_flags & HA_OPEN_TMP_TABLE) || + have_rtree) ? 0 : 1; + if (share->concurrent_insert) + { + share->lock.get_status=_ma_get_status; + share->lock.copy_status=_ma_copy_status; + share->lock.update_status=_ma_update_status; + share->lock.check_status=_ma_check_status; + } + } +#endif + } + else + { + share= old_info->s; + if (mode == O_RDWR && share->mode == O_RDONLY) + { + my_errno=EACCES; /* Can't open in write mode */ + goto err; + } + if (_ma_open_datafile(&info, share, old_info->dfile)) + goto err; + errpos=5; + have_rtree= old_info->maria_rtree_recursion_state != NULL; + } + + /* alloc and set up private structure parts */ + if (!my_multi_malloc(MY_WME, + &m_info,sizeof(MARIA_HA), + &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs, + &info.buff,(share->base.max_key_block_length*2+ + share->base.max_key_length), + &info.lastkey,share->base.max_key_length*3+1, + &info.first_mbr_key, share->base.max_key_length, + &info.filename,strlen(name)+1, + &info.maria_rtree_recursion_state,have_rtree ? 1024 : 0, + NullS)) + goto err; + errpos=6; + + if (!have_rtree) + info.maria_rtree_recursion_state= NULL; + + strmov(info.filename,name); + memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs); + info.lastkey2=info.lastkey+share->base.max_key_length; + + info.s=share; + info.lastpos= HA_OFFSET_ERROR; + info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND); + info.opt_flag=READ_CHECK_USED; + info.this_unique= (ulong) info.dfile; /* Uniq number in process */ + if (share->data_file_type == COMPRESSED_RECORD) + info.this_unique= share->state.unique; + info.this_loop=0; /* Update counter */ + info.last_unique= share->state.unique; + info.last_loop= share->state.update_count; + if (mode == O_RDONLY) + share->options|=HA_OPTION_READ_ONLY_DATA; + info.lock_type=F_UNLCK; + info.quick_mode=0; + info.bulk_insert=0; + info.ft1_to_ft2=0; + info.errkey= -1; + info.page_changed=1; + pthread_mutex_lock(&share->intern_lock); + info.read_record=share->read_record; + share->reopen++; + share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL); + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + info.lock_type=F_RDLCK; + share->r_locks++; + share->tot_locks++; + } + if ((open_flags & HA_OPEN_TMP_TABLE) || + (share->options & HA_OPTION_TMP_TABLE)) + { + share->temporary=share->delay_key_write=1; + share->write_flag=MYF(MY_NABP); + share->w_locks++; /* We don't have to update status */ + share->tot_locks++; + info.lock_type=F_WRLCK; + } + if (((open_flags & HA_OPEN_DELAY_KEY_WRITE) || + (share->options & HA_OPTION_DELAY_KEY_WRITE)) && + maria_delay_key_write) + share->delay_key_write=1; + info.state= &share->state.state; /* Change global values by default */ + pthread_mutex_unlock(&share->intern_lock); + + /* Allocate buffer for one record */ + + /* prerequisites: bzero(info) && info->s=share; are met. */ + if (!_ma_alloc_rec_buff(&info, -1, &info.rec_buff)) + goto err; + bzero(info.rec_buff, _ma_get_rec_buff_len(&info, info.rec_buff)); + + *m_info=info; +#ifdef THREAD + thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info); +#endif + m_info->open_list.data=(void*) m_info; + maria_open_list=list_add(maria_open_list,&m_info->open_list); + + pthread_mutex_unlock(&THR_LOCK_maria); + if (maria_log_file >= 0) + { + intern_filename(name_buff,share->index_file_name); + _ma_log(MARIA_LOG_OPEN,m_info,name_buff,(uint) strlen(name_buff)); + } + DBUG_RETURN(m_info); + +err: + save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; + if ((save_errno == HA_ERR_CRASHED) || + (save_errno == HA_ERR_CRASHED_ON_USAGE) || + (save_errno == HA_ERR_CRASHED_ON_REPAIR)) + _ma_report_error(save_errno, name); + switch (errpos) { + case 6: + my_free((gptr) m_info,MYF(0)); + /* fall through */ + case 5: + VOID(my_close(info.dfile,MYF(0))); + if (old_info) + break; /* Don't remove open table */ + /* fall through */ + case 4: + my_free((gptr) share,MYF(0)); + /* fall through */ + case 3: + if (! lock_error) + VOID(my_lock(kfile, F_UNLCK, 0L, F_TO_EOF, MYF(MY_SEEK_NOT_DONE))); + /* fall through */ + case 2: + my_afree((gptr) disk_cache); + /* fall through */ + case 1: + VOID(my_close(kfile,MYF(0))); + /* fall through */ + case 0: + default: + break; + } + pthread_mutex_unlock(&THR_LOCK_maria); + my_errno=save_errno; + DBUG_RETURN (NULL); +} /* maria_open */ + + +byte *_ma_alloc_rec_buff(MARIA_HA *info, ulong length, byte **buf) +{ + uint extra; + uint32 old_length; + LINT_INIT(old_length); + + if (! *buf || length > (old_length=_ma_get_rec_buff_len(info, *buf))) + { + byte *newptr = *buf; + + /* to simplify initial init of info->rec_buf in maria_open and maria_extra */ + if (length == (ulong) -1) + { + length= max(info->s->base.pack_reclength, + info->s->base.max_key_length); + /* Avoid unnecessary realloc */ + if (newptr && length == old_length) + return newptr; + } + + extra= ((info->s->options & HA_OPTION_PACK_RECORD) ? + ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_REC_BUFF_OFFSET : 0); + if (extra && newptr) + newptr-= MARIA_REC_BUFF_OFFSET; + if (!(newptr=(byte*) my_realloc((gptr)newptr, length+extra+8, + MYF(MY_ALLOW_ZERO_PTR)))) + return newptr; + *((uint32 *) newptr)= (uint32) length; + *buf= newptr+(extra ? MARIA_REC_BUFF_OFFSET : 0); + } + return *buf; +} + + +ulonglong _ma_safe_mul(ulonglong a, ulonglong b) +{ + ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */ + + if (!a || max_val / a < b) + return max_val; + return a*b; +} + + /* Set up functions in structs */ + +void _ma_setup_functions(register MARIA_SHARE *share) +{ + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + share->read_record= _ma_read_pack_record; + share->read_rnd= _ma_read_rnd_pack_record; + if (!(share->options & HA_OPTION_TEMP_COMPRESS_RECORD)) + share->calc_checksum=0; /* No checksum */ + else if (share->options & HA_OPTION_PACK_RECORD) + share->calc_checksum= _ma_checksum; + else + share->calc_checksum= _ma_static_checksum; + } + else if (share->options & HA_OPTION_PACK_RECORD) + { + share->read_record= _ma_read_dynamic_record; + share->read_rnd= _ma_read_rnd_dynamic_record; + share->delete_record= _ma_delete_dynamic_record; + share->compare_record= _ma_cmp_dynamic_record; + share->compare_unique= _ma_cmp_dynamic_unique; + share->calc_checksum= _ma_checksum; + + /* add bits used to pack data to pack_reclength for faster allocation */ + share->base.pack_reclength+= share->base.pack_bits; + if (share->base.blobs) + { + share->update_record= _ma_update_blob_record; + share->write_record= _ma_write_blob_record; + } + else + { + share->write_record= _ma_write_dynamic_record; + share->update_record= _ma_update_dynamic_record; + } + } + else + { + share->read_record= _ma_read_static_record; + share->read_rnd= _ma_read_rnd_static_record; + share->delete_record= _ma_delete_static_record; + share->compare_record= _ma_cmp_static_record; + share->update_record= _ma_update_static_record; + share->write_record= _ma_write_static_record; + share->compare_unique= _ma_cmp_static_unique; + share->calc_checksum= _ma_static_checksum; + } + share->file_read= _ma_nommap_pread; + share->file_write= _ma_nommap_pwrite; + if (!(share->options & HA_OPTION_CHECKSUM)) + share->calc_checksum=0; + return; +} + + +static void setup_key_functions(register MARIA_KEYDEF *keyinfo) +{ + if (keyinfo->key_alg == HA_KEY_ALG_RTREE) + { +#ifdef HAVE_RTREE_KEYS + keyinfo->ck_insert = maria_rtree_insert; + keyinfo->ck_delete = maria_rtree_delete; +#else + DBUG_ASSERT(0); /* maria_open should check it never happens */ +#endif + } + else + { + keyinfo->ck_insert = _ma_ck_write; + keyinfo->ck_delete = _ma_ck_delete; + } + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { /* Simple prefix compression */ + keyinfo->bin_search= _ma_seq_search; + keyinfo->get_key= _ma_get_binary_pack_key; + keyinfo->pack_key= _ma_calc_bin_pack_key_length; + keyinfo->store_key= _ma_store_bin_pack_key; + } + else if (keyinfo->flag & HA_VAR_LENGTH_KEY) + { + keyinfo->get_key= _ma_get_pack_key; + if (keyinfo->seg[0].flag & HA_PACK_KEY) + { /* Prefix compression */ + if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) || + (keyinfo->seg->flag & HA_NULL_PART)) + keyinfo->bin_search= _ma_seq_search; + else + keyinfo->bin_search= _ma_prefix_search; + keyinfo->pack_key= _ma_calc_var_pack_key_length; + keyinfo->store_key= _ma_store_var_pack_key; + } + else + { + keyinfo->bin_search= _ma_seq_search; + keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */ + keyinfo->store_key= _ma_store_static_key; + } + } + else + { + keyinfo->bin_search= _ma_bin_search; + keyinfo->get_key= _ma_get_static_key; + keyinfo->pack_key= _ma_calc_static_key_length; + keyinfo->store_key= _ma_store_static_key; + } + return; +} + + +/* + Function to save and store the header in the index file (.MYI) +*/ + +uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite) +{ + uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; + uchar *ptr=buff; + uint i, keys= (uint) state->header.keys, + key_blocks=state->header.max_block_size; + DBUG_ENTER("_ma_state_info_write"); + + memcpy_fixed(ptr,&state->header,sizeof(state->header)); + ptr+=sizeof(state->header); + + /* open_count must be first because of _ma_mark_file_changed ! */ + mi_int2store(ptr,state->open_count); ptr +=2; + *ptr++= (uchar)state->changed; *ptr++= state->sortkey; + mi_rowstore(ptr,state->state.records); ptr +=8; + mi_rowstore(ptr,state->state.del); ptr +=8; + mi_rowstore(ptr,state->split); ptr +=8; + mi_sizestore(ptr,state->dellink); ptr +=8; + mi_sizestore(ptr,state->state.key_file_length); ptr +=8; + mi_sizestore(ptr,state->state.data_file_length); ptr +=8; + mi_sizestore(ptr,state->state.empty); ptr +=8; + mi_sizestore(ptr,state->state.key_empty); ptr +=8; + mi_int8store(ptr,state->auto_increment); ptr +=8; + mi_int8store(ptr,(ulonglong) state->state.checksum);ptr +=8; + mi_int4store(ptr,state->process); ptr +=4; + mi_int4store(ptr,state->unique); ptr +=4; + mi_int4store(ptr,state->status); ptr +=4; + mi_int4store(ptr,state->update_count); ptr +=4; + + ptr+=state->state_diff_length; + + for (i=0; i < keys; i++) + { + mi_sizestore(ptr,state->key_root[i]); ptr +=8; + } + for (i=0; i < key_blocks; i++) + { + mi_sizestore(ptr,state->key_del[i]); ptr +=8; + } + if (pWrite & 2) /* From isamchk */ + { + uint key_parts= mi_uint2korr(state->header.key_parts); + mi_int4store(ptr,state->sec_index_changed); ptr +=4; + mi_int4store(ptr,state->sec_index_used); ptr +=4; + mi_int4store(ptr,state->version); ptr +=4; + mi_int8store(ptr,state->key_map); ptr +=8; + mi_int8store(ptr,(ulonglong) state->create_time); ptr +=8; + mi_int8store(ptr,(ulonglong) state->recover_time); ptr +=8; + mi_int8store(ptr,(ulonglong) state->check_time); ptr +=8; + mi_sizestore(ptr,state->rec_per_key_rows); ptr+=8; + for (i=0 ; i < key_parts ; i++) + { + mi_int4store(ptr,state->rec_per_key_part[i]); ptr+=4; + } + } + + if (pWrite & 1) + DBUG_RETURN(my_pwrite(file,(char*) buff, (uint) (ptr-buff), 0L, + MYF(MY_NABP | MY_THREADSAFE))); + DBUG_RETURN(my_write(file, (char*) buff, (uint) (ptr-buff), + MYF(MY_NABP))); +} + + +uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state) +{ + uint i,keys,key_parts,key_blocks; + memcpy_fixed(&state->header,ptr, sizeof(state->header)); + ptr +=sizeof(state->header); + keys=(uint) state->header.keys; + key_parts=mi_uint2korr(state->header.key_parts); + key_blocks=state->header.max_block_size; + + state->open_count = mi_uint2korr(ptr); ptr +=2; + state->changed= (bool) *ptr++; + state->sortkey = (uint) *ptr++; + state->state.records= mi_rowkorr(ptr); ptr +=8; + state->state.del = mi_rowkorr(ptr); ptr +=8; + state->split = mi_rowkorr(ptr); ptr +=8; + state->dellink= mi_sizekorr(ptr); ptr +=8; + state->state.key_file_length = mi_sizekorr(ptr); ptr +=8; + state->state.data_file_length= mi_sizekorr(ptr); ptr +=8; + state->state.empty = mi_sizekorr(ptr); ptr +=8; + state->state.key_empty= mi_sizekorr(ptr); ptr +=8; + state->auto_increment=mi_uint8korr(ptr); ptr +=8; + state->state.checksum=(ha_checksum) mi_uint8korr(ptr); ptr +=8; + state->process= mi_uint4korr(ptr); ptr +=4; + state->unique = mi_uint4korr(ptr); ptr +=4; + state->status = mi_uint4korr(ptr); ptr +=4; + state->update_count=mi_uint4korr(ptr); ptr +=4; + + ptr+= state->state_diff_length; + + for (i=0; i < keys; i++) + { + state->key_root[i]= mi_sizekorr(ptr); ptr +=8; + } + for (i=0; i < key_blocks; i++) + { + state->key_del[i] = mi_sizekorr(ptr); ptr +=8; + } + state->sec_index_changed = mi_uint4korr(ptr); ptr +=4; + state->sec_index_used = mi_uint4korr(ptr); ptr +=4; + state->version = mi_uint4korr(ptr); ptr +=4; + state->key_map = mi_uint8korr(ptr); ptr +=8; + state->create_time = (time_t) mi_sizekorr(ptr); ptr +=8; + state->recover_time =(time_t) mi_sizekorr(ptr); ptr +=8; + state->check_time = (time_t) mi_sizekorr(ptr); ptr +=8; + state->rec_per_key_rows=mi_sizekorr(ptr); ptr +=8; + for (i=0 ; i < key_parts ; i++) + { + state->rec_per_key_part[i]= mi_uint4korr(ptr); ptr+=4; + } + return ptr; +} + + +uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, my_bool pRead) +{ + char buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; + + if (!maria_single_user) + { + if (pRead) + { + if (my_pread(file, buff, state->state_length,0L, MYF(MY_NABP))) + return (MY_FILE_ERROR); + } + else if (my_read(file, buff, state->state_length,MYF(MY_NABP))) + return (MY_FILE_ERROR); + _ma_state_info_read((uchar*) buff, state); + } + return 0; +} + + +/**************************************************************************** +** store and read of MARIA_BASE_INFO +****************************************************************************/ + +uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) +{ + uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff; + + mi_sizestore(ptr,base->keystart); ptr +=8; + mi_sizestore(ptr,base->max_data_file_length); ptr +=8; + mi_sizestore(ptr,base->max_key_file_length); ptr +=8; + mi_rowstore(ptr,base->records); ptr +=8; + mi_rowstore(ptr,base->reloc); ptr +=8; + mi_int4store(ptr,base->mean_row_length); ptr +=4; + mi_int4store(ptr,base->reclength); ptr +=4; + mi_int4store(ptr,base->pack_reclength); ptr +=4; + mi_int4store(ptr,base->min_pack_length); ptr +=4; + mi_int4store(ptr,base->max_pack_length); ptr +=4; + mi_int4store(ptr,base->min_block_length); ptr +=4; + mi_int4store(ptr,base->fields); ptr +=4; + mi_int4store(ptr,base->pack_fields); ptr +=4; + *ptr++=base->rec_reflength; + *ptr++=base->key_reflength; + *ptr++=base->keys; + *ptr++=base->auto_key; + mi_int2store(ptr,base->pack_bits); ptr +=2; + mi_int2store(ptr,base->blobs); ptr +=2; + mi_int2store(ptr,base->max_key_block_length); ptr +=2; + mi_int2store(ptr,base->max_key_length); ptr +=2; + mi_int2store(ptr,base->extra_alloc_bytes); ptr +=2; + *ptr++= base->extra_alloc_procent; + *ptr++= base->raid_type; + mi_int2store(ptr,base->raid_chunks); ptr +=2; + mi_int4store(ptr,base->raid_chunksize); ptr +=4; + bzero(ptr,6); ptr +=6; /* extra */ + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + + +uchar *_ma_n_base_info_read(uchar *ptr, MARIA_BASE_INFO *base) +{ + base->keystart = mi_sizekorr(ptr); ptr +=8; + base->max_data_file_length = mi_sizekorr(ptr); ptr +=8; + base->max_key_file_length = mi_sizekorr(ptr); ptr +=8; + base->records = (ha_rows) mi_sizekorr(ptr); ptr +=8; + base->reloc = (ha_rows) mi_sizekorr(ptr); ptr +=8; + base->mean_row_length = mi_uint4korr(ptr); ptr +=4; + base->reclength = mi_uint4korr(ptr); ptr +=4; + base->pack_reclength = mi_uint4korr(ptr); ptr +=4; + base->min_pack_length = mi_uint4korr(ptr); ptr +=4; + base->max_pack_length = mi_uint4korr(ptr); ptr +=4; + base->min_block_length = mi_uint4korr(ptr); ptr +=4; + base->fields = mi_uint4korr(ptr); ptr +=4; + base->pack_fields = mi_uint4korr(ptr); ptr +=4; + + base->rec_reflength = *ptr++; + base->key_reflength = *ptr++; + base->keys= *ptr++; + base->auto_key= *ptr++; + base->pack_bits = mi_uint2korr(ptr); ptr +=2; + base->blobs = mi_uint2korr(ptr); ptr +=2; + base->max_key_block_length= mi_uint2korr(ptr); ptr +=2; + base->max_key_length = mi_uint2korr(ptr); ptr +=2; + base->extra_alloc_bytes = mi_uint2korr(ptr); ptr +=2; + base->extra_alloc_procent = *ptr++; + base->raid_type= *ptr++; + base->raid_chunks= mi_uint2korr(ptr); ptr +=2; + base->raid_chunksize= mi_uint4korr(ptr); ptr +=4; + /* TO BE REMOVED: Fix for old RAID files */ + if (base->raid_type == 0) + { + base->raid_chunks=0; + base->raid_chunksize=0; + } + + ptr+=6; + return ptr; +} + +/*-------------------------------------------------------------------------- + maria_keydef +---------------------------------------------------------------------------*/ + +uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef) +{ + uchar buff[MARIA_KEYDEF_SIZE]; + uchar *ptr=buff; + + *ptr++ = (uchar) keydef->keysegs; + *ptr++ = keydef->key_alg; /* Rtree or Btree */ + mi_int2store(ptr,keydef->flag); ptr +=2; + mi_int2store(ptr,keydef->block_length); ptr +=2; + mi_int2store(ptr,keydef->keylength); ptr +=2; + mi_int2store(ptr,keydef->minlength); ptr +=2; + mi_int2store(ptr,keydef->maxlength); ptr +=2; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef) +{ + keydef->keysegs = (uint) *ptr++; + keydef->key_alg = *ptr++; /* Rtree or Btree */ + + keydef->flag = mi_uint2korr(ptr); ptr +=2; + keydef->block_length = mi_uint2korr(ptr); ptr +=2; + keydef->keylength = mi_uint2korr(ptr); ptr +=2; + keydef->minlength = mi_uint2korr(ptr); ptr +=2; + keydef->maxlength = mi_uint2korr(ptr); ptr +=2; + keydef->block_size = keydef->block_length/MARIA_MIN_KEY_BLOCK_LENGTH-1; + keydef->underflow_block_length=keydef->block_length/3; + keydef->version = 0; /* Not saved */ + keydef->parser = &ft_default_parser; + keydef->ftparser_nr = 0; + return ptr; +} + +/*************************************************************************** +** maria_keyseg +***************************************************************************/ + +int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg) +{ + uchar buff[HA_KEYSEG_SIZE]; + uchar *ptr=buff; + ulong pos; + + *ptr++= keyseg->type; + *ptr++= keyseg->language; + *ptr++= keyseg->null_bit; + *ptr++= keyseg->bit_start; + *ptr++= keyseg->bit_end; + *ptr++= keyseg->bit_length; + mi_int2store(ptr,keyseg->flag); ptr+=2; + mi_int2store(ptr,keyseg->length); ptr+=2; + mi_int4store(ptr,keyseg->start); ptr+=4; + pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos; + mi_int4store(ptr, pos); + ptr+=4; + + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + + +char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg) +{ + keyseg->type = *ptr++; + keyseg->language = *ptr++; + keyseg->null_bit = *ptr++; + keyseg->bit_start = *ptr++; + keyseg->bit_end = *ptr++; + keyseg->bit_length = *ptr++; + keyseg->flag = mi_uint2korr(ptr); ptr +=2; + keyseg->length = mi_uint2korr(ptr); ptr +=2; + keyseg->start = mi_uint4korr(ptr); ptr +=4; + keyseg->null_pos = mi_uint4korr(ptr); ptr +=4; + keyseg->charset=0; /* Will be filled in later */ + if (keyseg->null_bit) + keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7)); + else + { + keyseg->bit_pos= (uint16)keyseg->null_pos; + keyseg->null_pos= 0; + } + return ptr; +} + +/*-------------------------------------------------------------------------- + maria_uniquedef +---------------------------------------------------------------------------*/ + +uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def) +{ + uchar buff[MARIA_UNIQUEDEF_SIZE]; + uchar *ptr=buff; + + mi_int2store(ptr,def->keysegs); ptr+=2; + *ptr++= (uchar) def->key; + *ptr++ = (uchar) def->null_are_equal; + + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *def) +{ + def->keysegs = mi_uint2korr(ptr); + def->key = ptr[2]; + def->null_are_equal=ptr[3]; + return ptr+4; /* 1 extra byte */ +} + +/*************************************************************************** +** MARIA_COLUMNDEF +***************************************************************************/ + +uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo) +{ + uchar buff[MARIA_COLUMNDEF_SIZE]; + uchar *ptr=buff; + + mi_int2store(ptr,recinfo->type); ptr +=2; + mi_int2store(ptr,recinfo->length); ptr +=2; + *ptr++ = recinfo->null_bit; + mi_int2store(ptr,recinfo->null_pos); ptr+= 2; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo) +{ + recinfo->type= mi_sint2korr(ptr); ptr +=2; + recinfo->length=mi_uint2korr(ptr); ptr +=2; + recinfo->null_bit= (uint8) *ptr++; + recinfo->null_pos=mi_uint2korr(ptr); ptr +=2; + return ptr; +} + +/************************************************************************** +Open data file with or without RAID +We can't use dup() here as the data file descriptors need to have different +active seek-positions. + +The argument file_to_dup is here for the future if there would on some OS +exist a dup()-like call that would give us two different file descriptors. +*************************************************************************/ + +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup __attribute__((unused))) +{ +#ifdef USE_RAID + if (share->base.raid_type) + { + info->dfile=my_raid_open(share->data_file_name, + share->mode | O_SHARE, + share->base.raid_type, + share->base.raid_chunks, + share->base.raid_chunksize, + MYF(MY_WME | MY_RAID)); + } + else +#endif + info->dfile=my_open(share->data_file_name, share->mode | O_SHARE, + MYF(MY_WME)); + return info->dfile >= 0 ? 0 : 1; +} + + +int _ma_open_keyfile(MARIA_SHARE *share) +{ + if ((share->kfile=my_open(share->unique_file_name, share->mode | O_SHARE, + MYF(MY_WME))) < 0) + return 1; + return 0; +} + + +/* + Disable all indexes. + + SYNOPSIS + maria_disable_indexes() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Disable all indexes. + + RETURN + 0 ok +*/ + +int maria_disable_indexes(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + + maria_clear_all_keys_active(share->state.key_map); + return 0; +} + + +/* + Enable all indexes + + SYNOPSIS + maria_enable_indexes() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Enable all indexes. The indexes might have been disabled + by maria_disable_index() before. + The function works only if both data and indexes are empty, + otherwise a repair is required. + To be sure, call handler::delete_all_rows() before. + + RETURN + 0 ok + HA_ERR_CRASHED data or index is non-empty. +*/ + +int maria_enable_indexes(MARIA_HA *info) +{ + int error= 0; + MARIA_SHARE *share= info->s; + + if (share->state.state.data_file_length || + (share->state.state.key_file_length != share->base.keystart)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + error= HA_ERR_CRASHED; + } + else + maria_set_all_keys_active(share->state.key_map, share->base.keys); + return error; +} + + +/* + Test if indexes are disabled. + + SYNOPSIS + maria_indexes_are_disabled() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Test if indexes are disabled. + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] +*/ + +int maria_indexes_are_disabled(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + + return (! maria_is_any_key_active(share->state.key_map) && share->base.keys); +} diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c new file mode 100644 index 00000000000..eb99e299f9a --- /dev/null +++ b/storage/maria/ma_packrec.c @@ -0,0 +1,1346 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /* Functions to compressed records */ + +#include "maria_def.h" + +#define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */ + +#if INT_MAX > 65536L +#define BITS_SAVED 32 +#define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */ +#else +#define BITS_SAVED 16 +#define MAX_QUICK_TABLE_BITS 6 +#endif + +#define get_bit(BU) ((BU)->bits ? \ + (BU)->current_byte & ((maria_bit_type) 1 << --(BU)->bits) :\ + (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\ + (BU)->current_byte & ((maria_bit_type) 1 << (BITS_SAVED-1)))) +#define skip_to_next_byte(BU) ((BU)->bits&=~7) +#define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count)) + +#define decode_bytes_test_bit(bit) \ + if (low_byte & (1 << (7-bit))) \ + pos++; \ + if (*pos & IS_CHAR) \ + { bits-=(bit+1); break; } \ + pos+= *pos + +#define OFFSET_TABLE_SIZE 512 + +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff,MARIA_DECODE_TREE *decode_tree, + uint16 **decode_table,byte **intervall_buff, + uint16 *tmp_buff); +static void make_quick_table(uint16 *to_table,uint16 *decode_table, + uint *next_free,uint value,uint bits, + uint max_bits); +static void fill_quick_table(uint16 *table,uint bits, uint max_bits, + uint value); +static uint copy_decode_table(uint16 *to_pos,uint offset, + uint16 *decode_table); +static uint find_longest_bitstream(uint16 *table, uint16 *end); +static void (*get_unpack_function(MARIA_COLUMNDEF *rec))(MARIA_COLUMNDEF *field, + MARIA_BIT_BUFF *buff, + uchar *to, + uchar *end); +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_space_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end); +static void uf_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_space_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end); +static void uf_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_space_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_constant(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_intervall(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end); +static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end); +static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end); +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + uchar *to,uchar *end); +static uint decode_pos(MARIA_BIT_BUFF *bit_buff,MARIA_DECODE_TREE *decode_tree); +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer,uint length); +static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff,uint count); +static void fill_buffer(MARIA_BIT_BUFF *bit_buff); +static uint max_bit(uint value); +static uint read_pack_length(uint version, const uchar *buf, ulong *length); +#ifdef HAVE_MMAP +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, + uchar *header); +#endif + +static maria_bit_type mask[]= +{ + 0x00000000, + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, + 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, + 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, +#if BITS_SAVED > 16 + 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, + 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, + 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, + 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff, +#endif + }; + + + /* Read all packed info, allocate memory and fix field structs */ + +my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys) +{ + File file; + int diff_length; + uint i,trees,huff_tree_bits,rec_reflength,length; + uint16 *decode_table,*tmp_buff; + ulong elements,intervall_length; + char *disk_cache,*intervall_buff; + uchar header[32]; + MARIA_SHARE *share=info->s; + MARIA_BIT_BUFF bit_buff; + DBUG_ENTER("_ma_read_pack_info"); + + if (maria_quick_table_bits < 4) + maria_quick_table_bits=4; + else if (maria_quick_table_bits > MAX_QUICK_TABLE_BITS) + maria_quick_table_bits=MAX_QUICK_TABLE_BITS; + + file=info->dfile; + my_errno=0; + if (my_read(file,(byte*) header,sizeof(header),MYF(MY_NABP))) + { + if (!my_errno) + my_errno=HA_ERR_END_OF_FILE; + goto err0; + } + if (memcmp((byte*) header, (byte*) maria_pack_file_magic, 3)) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err0; + } + share->pack.version= header[3]; + share->pack.header_length= uint4korr(header+4); + share->min_pack_length=(uint) uint4korr(header+8); + share->max_pack_length=(uint) uint4korr(header+12); + set_if_bigger(share->base.pack_reclength,share->max_pack_length); + elements=uint4korr(header+16); + intervall_length=uint4korr(header+20); + trees=uint2korr(header+24); + share->pack.ref_length=header[26]; + rec_reflength=header[27]; + diff_length=(int) rec_reflength - (int) share->base.rec_reflength; + if (fix_keys) + share->rec_reflength=rec_reflength; + share->base.min_block_length=share->min_pack_length+1; + if (share->min_pack_length > 254) + share->base.min_block_length+=2; + + if (!(share->decode_trees=(MARIA_DECODE_TREE*) + my_malloc((uint) (trees*sizeof(MARIA_DECODE_TREE)+ + intervall_length*sizeof(byte)), + MYF(MY_WME)))) + goto err0; + intervall_buff=(byte*) (share->decode_trees+trees); + + length=(uint) (elements*2+trees*(1 << maria_quick_table_bits)); + if (!(share->decode_tables=(uint16*) + my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+ + (uint) (share->pack.header_length+7), + MYF(MY_WME | MY_ZEROFILL)))) + goto err1; + tmp_buff=share->decode_tables+length; + disk_cache=(byte*) (tmp_buff+OFFSET_TABLE_SIZE); + + if (my_read(file,disk_cache, + (uint) (share->pack.header_length-sizeof(header)), + MYF(MY_NABP))) + goto err2; + + huff_tree_bits=max_bit(trees ? trees-1 : 0); + init_bit_buffer(&bit_buff, (uchar*) disk_cache, + (uint) (share->pack.header_length-sizeof(header))); + /* Read new info for each field */ + for (i=0 ; i < share->base.fields ; i++) + { + share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5); + share->rec[i].pack_type=(uint) get_bits(&bit_buff,6); + share->rec[i].space_length_bits=get_bits(&bit_buff,5); + share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff, + huff_tree_bits); + share->rec[i].unpack=get_unpack_function(share->rec+i); + } + skip_to_next_byte(&bit_buff); + decode_table=share->decode_tables; + for (i=0 ; i < trees ; i++) + if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table, + &intervall_buff,tmp_buff)) + goto err3; + decode_table=(uint16*) + my_realloc((gptr) share->decode_tables, + (uint) ((byte*) decode_table - (byte*) share->decode_tables), + MYF(MY_HOLD_ON_ERROR)); + { + long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables); + share->decode_tables=decode_table; + for (i=0 ; i < trees ; i++) + share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table, + diff, uint16*); + } + + /* Fix record-ref-length for keys */ + if (fix_keys) + { + for (i=0 ; i < share->base.keys ; i++) + { + share->keyinfo[i].keylength+=(uint16) diff_length; + share->keyinfo[i].minlength+=(uint16) diff_length; + share->keyinfo[i].maxlength+=(uint16) diff_length; + share->keyinfo[i].seg[share->keyinfo[i].keysegs].length= + (uint16) rec_reflength; + } + } + + if (bit_buff.error || bit_buff.pos < bit_buff.end) + goto err3; + + DBUG_RETURN(0); + +err3: + my_errno=HA_ERR_WRONG_IN_RECORD; +err2: + my_free((gptr) share->decode_tables,MYF(0)); +err1: + my_free((gptr) share->decode_trees,MYF(0)); +err0: + DBUG_RETURN(1); +} + + + /* Read on huff-code-table from datafile */ + +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, MARIA_DECODE_TREE *decode_tree, + uint16 **decode_table, byte **intervall_buff, + uint16 *tmp_buff) +{ + uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits, + next_free_offset; + uint16 *ptr,*end; + + LINT_INIT(ptr); + if (!get_bits(bit_buff,1)) + { + min_chr=get_bits(bit_buff,8); + elements=get_bits(bit_buff,9); + char_bits=get_bits(bit_buff,5); + offset_bits=get_bits(bit_buff,5); + intervall_length=0; + ptr=tmp_buff; + } + else + { + min_chr=0; + elements=get_bits(bit_buff,15); + intervall_length=get_bits(bit_buff,16); + char_bits=get_bits(bit_buff,5); + offset_bits=get_bits(bit_buff,5); + decode_tree->quick_table_bits=0; + ptr= *decode_table; + } + size=elements*2-2; + + for (end=ptr+size ; ptr < end ; ptr++) + { + if (get_bit(bit_buff)) + *ptr= (uint16) get_bits(bit_buff,offset_bits); + else + *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr)); + } + skip_to_next_byte(bit_buff); + + decode_tree->table= *decode_table; + decode_tree->intervalls= *intervall_buff; + if (! intervall_length) + { + table_bits=find_longest_bitstream(tmp_buff, tmp_buff+OFFSET_TABLE_SIZE); + if (table_bits == (uint) ~0) + return 1; + if (table_bits > maria_quick_table_bits) + table_bits=maria_quick_table_bits; + next_free_offset= (1 << table_bits); + make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits, + table_bits); + (*decode_table)+= next_free_offset; + decode_tree->quick_table_bits=table_bits; + } + else + { + (*decode_table)=end; + bit_buff->pos-= bit_buff->bits/8; + memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length); + (*intervall_buff)+=intervall_length; + bit_buff->pos+=intervall_length; + bit_buff->bits=0; + } + return 0; +} + + +static void make_quick_table(uint16 *to_table, uint16 *decode_table, + uint *next_free_offset, uint value, uint bits, + uint max_bits) +{ + if (!bits--) + { + to_table[value]= (uint16) *next_free_offset; + *next_free_offset=copy_decode_table(to_table, *next_free_offset, + decode_table); + return; + } + if (!(*decode_table & IS_CHAR)) + { + make_quick_table(to_table,decode_table+ *decode_table, + next_free_offset,value,bits,max_bits); + } + else + fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + decode_table++; + value|= (1 << bits); + if (!(*decode_table & IS_CHAR)) + { + make_quick_table(to_table,decode_table+ *decode_table, + next_free_offset,value,bits,max_bits); + } + else + fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + return; +} + + +static void fill_quick_table(uint16 *table, uint bits, uint max_bits, + uint value) +{ + uint16 *end; + value|=(max_bits-bits) << 8; + for (end=table+ (1 << bits) ; + table < end ; + *table++ = (uint16) value | IS_CHAR) ; +} + + +static uint copy_decode_table(uint16 *to_pos, uint offset, + uint16 *decode_table) +{ + uint prev_offset; + prev_offset= offset; + + if (!(*decode_table & IS_CHAR)) + { + to_pos[offset]=2; + offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table); + } + else + { + to_pos[offset]= *decode_table; + offset+=2; + } + decode_table++; + + if (!(*decode_table & IS_CHAR)) + { + to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1); + offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table); + } + else + to_pos[prev_offset+1]= *decode_table; + return offset; +} + + +static uint find_longest_bitstream(uint16 *table, uint16 *end) +{ + uint length=1,length2; + if (!(*table & IS_CHAR)) + { + uint16 *next= table + *table; + if (next > end || next == table) + return ~0; + length=find_longest_bitstream(next, end)+1; + } + table++; + if (!(*table & IS_CHAR)) + { + uint16 *next= table + *table; + if (next > end || next == table) + return ~0; + length2=find_longest_bitstream(table+ *table, end)+1; + length=max(length,length2); + } + return length; +} + + +/* + Read record from datafile. + + SYNOPSIS + _ma_read_pack_record() + info A pointer to MARIA_HA. + filepos File offset of the record. + buf RETURN The buffer to receive the record. + + RETURN + 0 on success + HA_ERR_WRONG_IN_RECORD or -1 on error +*/ + +int _ma_read_pack_record(MARIA_HA *info, my_off_t filepos, byte *buf) +{ + MARIA_BLOCK_INFO block_info; + File file; + DBUG_ENTER("maria_read_pack_record"); + + if (filepos == HA_OFFSET_ERROR) + DBUG_RETURN(-1); /* _search() didn't find record */ + + file=info->dfile; + if (_ma_pack_get_block_info(info, &block_info, file, filepos)) + goto err; + if (my_read(file,(byte*) info->rec_buff + block_info.offset , + block_info.rec_len - block_info.offset, MYF(MY_NABP))) + goto panic; + info->update|= HA_STATE_AKTIV; + DBUG_RETURN(_ma_pack_rec_unpack(info,buf,info->rec_buff,block_info.rec_len)); +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; +err: + DBUG_RETURN(-1); +} + + + +int _ma_pack_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, + ulong reclength) +{ + byte *end_field; + reg3 MARIA_COLUMNDEF *end; + MARIA_COLUMNDEF *current_field; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_pack_rec_unpack"); + + init_bit_buffer(&info->bit_buff, (uchar*) from,reclength); + + for (current_field=share->rec, end=current_field+share->base.fields ; + current_field < end ; + current_field++,to=end_field) + { + end_field=to+current_field->length; + (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to, + (uchar*) end_field); + } + if (! info->bit_buff.error && + info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end) + DBUG_RETURN(0); + info->update&= ~HA_STATE_AKTIV; + DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); +} /* _ma_pack_rec_unpack */ + + + /* Return function to unpack field */ + +static void (*get_unpack_function(MARIA_COLUMNDEF *rec)) +(MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, uchar *, uchar *) +{ + switch (rec->base_type) { + case FIELD_SKIP_ZERO: + if (rec->pack_type & PACK_TYPE_ZERO_FILL) + return &uf_zerofill_skip_zero; + return &uf_skip_zero; + case FIELD_NORMAL: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + return &uf_space_normal; + if (rec->pack_type & PACK_TYPE_ZERO_FILL) + return &uf_zerofill_normal; + return &decode_bytes; + case FIELD_SKIP_ENDSPACE: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + { + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_space_endspace_selected; + return &uf_space_endspace; + } + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_endspace_selected; + return &uf_endspace; + case FIELD_SKIP_PRESPACE: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + { + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_space_prespace_selected; + return &uf_space_prespace; + } + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_prespace_selected; + return &uf_prespace; + case FIELD_CONSTANT: + return &uf_constant; + case FIELD_INTERVALL: + return &uf_intervall; + case FIELD_ZERO: + case FIELD_CHECK: + return &uf_zero; + case FIELD_BLOB: + return &uf_blob; + case FIELD_VARCHAR: + if (rec->length <= 256) /* 255 + 1 byte length */ + return &uf_varchar1; + return &uf_varchar2; + case FIELD_LAST: + default: + return 0; /* This should never happend */ + } +} + + /* The different functions to unpack a field */ + +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end) +{ + if (get_bit(bit_buff)) + bzero((char*) to,(uint) (end-to)); + else + { + end-=rec->space_length_bits; + decode_bytes(rec,bit_buff,to,end); + bzero((char*) end,rec->space_length_bits); + } +} + +static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + if (get_bit(bit_buff)) + bzero((char*) to,(uint) (end-to)); + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } + else + decode_bytes(rec,bit_buff,to,end); + } +} + +static void uf_endspace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end) +{ + uint spaces; + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } +} + +static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + uint spaces; + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); +} + +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } + else + decode_bytes(rec,bit_buff,to,end); + } +} + + +static void uf_prespace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end) +{ + uint spaces; + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } + else + decode_bytes(rec,bit_buff,to,end); +} + + +static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } +} + +static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + uint spaces; + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); +} + +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + end-=rec->space_length_bits; + decode_bytes(rec,bit_buff,(uchar*) to,end); + bzero((char*) end,rec->space_length_bits); +} + +static void uf_constant(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff __attribute__((unused)), + uchar *to, + uchar *end) +{ + memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to)); +} + +static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + reg1 uint field_length=(uint) (end-to); + memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff, + rec->huff_tree), + (size_t) field_length); +} + + +/*ARGSUSED*/ +static void uf_zero(MARIA_COLUMNDEF *rec __attribute__((unused)), + MARIA_BIT_BUFF *bit_buff __attribute__((unused)), + uchar *to, uchar *end) +{ + bzero((char*) to,(uint) (end-to)); +} + +static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end) +{ + if (get_bit(bit_buff)) + bzero((byte*) to,(end-to)); + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + uint pack_length=(uint) (end-to)-maria_portable_sizeof_char_ptr; + if (bit_buff->blob_pos+length > bit_buff->blob_end) + { + bit_buff->error=1; + bzero((byte*) to,(end-to)); + return; + } + decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length); + _ma_store_blob_length((byte*) to,pack_length,length); + memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, + sizeof(char*)); + bit_buff->blob_pos+=length; + } +} + + +static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]= 0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + *to= (uchar) length; + decode_bytes(rec,bit_buff,to+1,to+1+length); + } +} + + +static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + uchar *to, uchar *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]=to[1]=0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + int2store(to,length); + decode_bytes(rec,bit_buff,to+2,to+2+length); + } +} + + /* Functions to decode of buffer of bits */ + +#if BITS_SAVED == 64 + +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,uchar *to, + uchar *end) +{ + reg1 uint bits,low_byte; + reg3 uint16 *pos; + reg4 uint table_bits,table_and; + MARIA_DECODE_TREE *decode_tree; + + decode_tree=rec->decode_tree; + bits=bit_buff->bits; /* Save in reg for quicker access */ + table_bits=decode_tree->quick_table_bits; + table_and= (1 << table_bits)-1; + + do + { + if (bits <= 32) + { + if (bit_buff->pos > bit_buff->end+4) + { + bit_buff->error=1; + return; /* Can't be right */ + } + bit_buff->current_byte= (bit_buff->current_byte << 32) + + ((((uint) bit_buff->pos[3])) + + (((uint) bit_buff->pos[2]) << 8) + + (((uint) bit_buff->pos[1]) << 16) + + (((uint) bit_buff->pos[0]) << 24)); + bit_buff->pos+=4; + bits+=32; + } + /* First use info in quick_table */ + low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and; + low_byte=decode_tree->table[low_byte]; + if (low_byte & IS_CHAR) + { + *to++ = (low_byte & 255); /* Found char in quick table */ + bits-= ((low_byte >> 8) & 31); /* Remove bits used */ + } + else + { /* Map through rest of decode-table */ + pos=decode_tree->table+low_byte; + bits-=table_bits; + for (;;) + { + low_byte=(uint) (bit_buff->current_byte >> (bits-8)); + decode_bytes_test_bit(0); + decode_bytes_test_bit(1); + decode_bytes_test_bit(2); + decode_bytes_test_bit(3); + decode_bytes_test_bit(4); + decode_bytes_test_bit(5); + decode_bytes_test_bit(6); + decode_bytes_test_bit(7); + bits-=8; + } + *to++ = *pos; + } + } while (to != end); + + bit_buff->bits=bits; + return; +} + +#else + +static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, + uchar *end) +{ + reg1 uint bits,low_byte; + reg3 uint16 *pos; + reg4 uint table_bits,table_and; + MARIA_DECODE_TREE *decode_tree; + + decode_tree=rec->huff_tree; + bits=bit_buff->bits; /* Save in reg for quicker access */ + table_bits=decode_tree->quick_table_bits; + table_and= (1 << table_bits)-1; + + do + { + if (bits < table_bits) + { + if (bit_buff->pos > bit_buff->end+1) + { + bit_buff->error=1; + return; /* Can't be right */ + } +#if BITS_SAVED == 32 + bit_buff->current_byte= (bit_buff->current_byte << 24) + + (((uint) ((uchar) bit_buff->pos[2]))) + + (((uint) ((uchar) bit_buff->pos[1])) << 8) + + (((uint) ((uchar) bit_buff->pos[0])) << 16); + bit_buff->pos+=3; + bits+=24; +#else + if (bits) /* We must have at leasts 9 bits */ + { + bit_buff->current_byte= (bit_buff->current_byte << 8) + + (uint) ((uchar) bit_buff->pos[0]); + bit_buff->pos++; + bits+=8; + } + else + { + bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) + + ((uint) ((uchar) bit_buff->pos[1])); + bit_buff->pos+=2; + bits+=16; + } +#endif + } + /* First use info in quick_table */ + low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and; + low_byte=decode_tree->table[low_byte]; + if (low_byte & IS_CHAR) + { + *to++ = (low_byte & 255); /* Found char in quick table */ + bits-= ((low_byte >> 8) & 31); /* Remove bits used */ + } + else + { /* Map through rest of decode-table */ + pos=decode_tree->table+low_byte; + bits-=table_bits; + for (;;) + { + if (bits < 8) + { /* We don't need to check end */ +#if BITS_SAVED == 32 + bit_buff->current_byte= (bit_buff->current_byte << 24) + + (((uint) ((uchar) bit_buff->pos[2]))) + + (((uint) ((uchar) bit_buff->pos[1])) << 8) + + (((uint) ((uchar) bit_buff->pos[0])) << 16); + bit_buff->pos+=3; + bits+=24; +#else + bit_buff->current_byte= (bit_buff->current_byte << 8) + + (uint) ((uchar) bit_buff->pos[0]); + bit_buff->pos+=1; + bits+=8; +#endif + } + low_byte=(uint) (bit_buff->current_byte >> (bits-8)); + decode_bytes_test_bit(0); + decode_bytes_test_bit(1); + decode_bytes_test_bit(2); + decode_bytes_test_bit(3); + decode_bytes_test_bit(4); + decode_bytes_test_bit(5); + decode_bytes_test_bit(6); + decode_bytes_test_bit(7); + bits-=8; + } + *to++ = (uchar) *pos; + } + } while (to != end); + + bit_buff->bits=bits; + return; +} +#endif /* BIT_SAVED == 64 */ + + +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, MARIA_DECODE_TREE *decode_tree) +{ + uint16 *pos=decode_tree->table; + for (;;) + { + if (get_bit(bit_buff)) + pos++; + if (*pos & IS_CHAR) + return (uint) (*pos & ~IS_CHAR); + pos+= *pos; + } +} + + +int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, + register my_off_t filepos, + my_bool skip_deleted_blocks) +{ + uint b_type; + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_pack_record"); + + if (filepos >= info->state->data_file_length) + { + my_errno= HA_ERR_END_OF_FILE; + goto err; + } + + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, + share->pack.ref_length, skip_deleted_blocks)) + goto err; + b_type= _ma_pack_get_block_info(info,&block_info,-1, filepos); + } + else + b_type= _ma_pack_get_block_info(info,&block_info,info->dfile,filepos); + if (b_type) + goto err; /* Error code is already set */ +#ifndef DBUG_OFF + if (block_info.rec_len > share->max_pack_length) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } +#endif + + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, + skip_deleted_blocks)) + goto err; + } + else + { + if (my_read(info->dfile,(byte*) info->rec_buff + block_info.offset, + block_info.rec_len-block_info.offset, + MYF(MY_NABP))) + goto err; + } + info->packed_length=block_info.rec_len; + info->lastpos=filepos; + info->nextpos=block_info.filepos+block_info.rec_len; + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + + DBUG_RETURN (_ma_pack_rec_unpack(info,buf,info->rec_buff, + block_info.rec_len)); + err: + DBUG_RETURN(my_errno); +} + + + /* Read and process header from a huff-record-file */ + +uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, File file, + my_off_t filepos) +{ + uchar *header=info->header; + uint head_length,ref_length; + LINT_INIT(ref_length); + + if (file >= 0) + { + ref_length=maria->s->pack.ref_length; + /* + We can't use my_pread() here because maria_read_rnd_pack_record assumes + position is ok + */ + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); + if (my_read(file,(char*) header,ref_length,MYF(MY_NABP))) + return BLOCK_FATAL_ERROR; + DBUG_DUMP("header",(byte*) header,ref_length); + } + head_length= read_pack_length((uint) maria->s->pack.version, header, + &info->rec_len); + if (maria->s->base.blobs) + { + head_length+= read_pack_length((uint) maria->s->pack.version, + header + head_length, &info->blob_len); + if (!(_ma_alloc_rec_buff(maria,info->rec_len + info->blob_len, + &maria->rec_buff))) + return BLOCK_FATAL_ERROR; /* not enough memory */ + maria->bit_buff.blob_pos=(uchar*) maria->rec_buff+info->rec_len; + maria->bit_buff.blob_end= maria->bit_buff.blob_pos+info->blob_len; + maria->blob_length=info->blob_len; + } + info->filepos=filepos+head_length; + if (file > 0) + { + info->offset=min(info->rec_len, ref_length - head_length); + memcpy(maria->rec_buff, header+head_length, info->offset); + } + return 0; +} + + + /* rutines for bit buffer */ + /* Note buffer must be 6 byte bigger than longest row */ + +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer, uint length) +{ + bit_buff->pos=buffer; + bit_buff->end=buffer+length; + bit_buff->bits=bit_buff->error=0; + bit_buff->current_byte=0; /* Avoid purify errors */ +} + +static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff, uint count) +{ + uint tmp; + count-=bit_buff->bits; + tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count; + fill_buffer(bit_buff); + bit_buff->bits=BITS_SAVED - count; + return tmp+(bit_buff->current_byte >> (BITS_SAVED - count)); +} + + /* Fill in empty bit_buff->current_byte from buffer */ + /* Sets bit_buff->error if buffer is exhausted */ + +static void fill_buffer(MARIA_BIT_BUFF *bit_buff) +{ + if (bit_buff->pos >= bit_buff->end) + { + bit_buff->error= 1; + bit_buff->current_byte=0; + return; + } +#if BITS_SAVED == 64 + bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) + + (((uint) ((uchar) bit_buff->pos[6])) << 8) + + (((uint) ((uchar) bit_buff->pos[5])) << 16) + + (((uint) ((uchar) bit_buff->pos[4])) << 24) + + ((ulonglong) + ((((uint) ((uchar) bit_buff->pos[3]))) + + (((uint) ((uchar) bit_buff->pos[2])) << 8) + + (((uint) ((uchar) bit_buff->pos[1])) << 16) + + (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32)); + bit_buff->pos+=8; +#else +#if BITS_SAVED == 32 + bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) + + (((uint) ((uchar) bit_buff->pos[2])) << 8) + + (((uint) ((uchar) bit_buff->pos[1])) << 16) + + (((uint) ((uchar) bit_buff->pos[0])) << 24)); + bit_buff->pos+=4; +#else + bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+ + (((uint) ((uchar) bit_buff->pos[0])) << 8)); + bit_buff->pos+=2; +#endif +#endif +} + + /* Get number of bits neaded to represent value */ + +static uint max_bit(register uint value) +{ + reg2 uint power=1; + + while ((value>>=1)) + power++; + return (power); +} + + +/***************************************************************************** + Some redefined functions to handle files when we are using memmap +*****************************************************************************/ +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +#ifdef HAVE_MMAP + +static int _ma_read_mempack_record(MARIA_HA *info,my_off_t filepos,byte *buf); +static int _ma_read_rnd_mempack_record(MARIA_HA*, byte *,my_off_t, my_bool); + +my_bool _ma_memmap_file(MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_memmap_file"); + + if (!info->s->file_map) + { + if (my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)) < + share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN) + { + DBUG_PRINT("warning",("File isn't extended for memmap")); + DBUG_RETURN(0); + } + if (_ma_dynmap_file(info, share->state.state.data_file_length)) + DBUG_RETURN(0); + } + info->opt_flag|= MEMMAP_USED; + info->read_record= share->read_record= _ma_read_mempack_record; + share->read_rnd= _ma_read_rnd_mempack_record; + DBUG_RETURN(1); +} + + +void _ma_unmap_file(MARIA_HA *info) +{ + VOID(my_munmap(info->s->file_map, + (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN)); +} + + +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, + uchar *header) +{ + header+= read_pack_length((uint) maria->s->pack.version, header, + &info->rec_len); + if (maria->s->base.blobs) + { + header+= read_pack_length((uint) maria->s->pack.version, header, + &info->blob_len); + /* _ma_alloc_rec_buff sets my_errno on error */ + if (!(_ma_alloc_rec_buff(maria, info->blob_len, + &maria->rec_buff))) + return 0; /* not enough memory */ + maria->bit_buff.blob_pos=(uchar*) maria->rec_buff; + maria->bit_buff.blob_end= (uchar*) maria->rec_buff + info->blob_len; + } + return header; +} + + +static int _ma_read_mempack_record(MARIA_HA *info, my_off_t filepos, byte *buf) +{ + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + byte *pos; + DBUG_ENTER("maria_read_mempack_record"); + + if (filepos == HA_OFFSET_ERROR) + DBUG_RETURN(-1); /* _search() didn't find record */ + + if (!(pos= (byte*) _ma_mempack_get_block_info(info,&block_info, + (uchar*) share->file_map+ + filepos))) + DBUG_RETURN(-1); + DBUG_RETURN(_ma_pack_rec_unpack(info, buf, pos, block_info.rec_len)); +} + + +/*ARGSUSED*/ +static int _ma_read_rnd_mempack_record(MARIA_HA *info, byte *buf, + register my_off_t filepos, + my_bool skip_deleted_blocks + __attribute__((unused))) +{ + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + byte *pos,*start; + DBUG_ENTER("_ma_read_rnd_mempack_record"); + + if (filepos >= share->state.state.data_file_length) + { + my_errno=HA_ERR_END_OF_FILE; + goto err; + } + if (!(pos= (byte*) _ma_mempack_get_block_info(info,&block_info, + (uchar*) + (start=share->file_map+ + filepos)))) + goto err; +#ifndef DBUG_OFF + if (block_info.rec_len > info->s->max_pack_length) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } +#endif + info->packed_length=block_info.rec_len; + info->lastpos=filepos; + info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len; + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + + DBUG_RETURN (_ma_pack_rec_unpack(info,buf,pos, block_info.rec_len)); + err: + DBUG_RETURN(my_errno); +} + +#endif /* HAVE_MMAP */ + + /* Save length of row */ + +uint _ma_save_pack_length(uint version, byte *block_buff, ulong length) +{ + if (length < 254) + { + *(uchar*) block_buff= (uchar) length; + return 1; + } + if (length <= 65535) + { + *(uchar*) block_buff=254; + int2store(block_buff+1,(uint) length); + return 3; + } + *(uchar*) block_buff=255; + if (version == 1) /* old format */ + { + DBUG_ASSERT(length <= 0xFFFFFF); + int3store(block_buff + 1, (ulong) length); + return 4; + } + else + { + int4store(block_buff + 1, (ulong) length); + return 5; + } +} + + +static uint read_pack_length(uint version, const uchar *buf, ulong *length) +{ + if (buf[0] < 254) + { + *length= buf[0]; + return 1; + } + else if (buf[0] == 254) + { + *length= uint2korr(buf + 1); + return 3; + } + if (version == 1) /* old format */ + { + *length= uint3korr(buf + 1); + return 4; + } + else + { + *length= uint4korr(buf + 1); + return 5; + } +} + + +uint _ma_calc_pack_length(uint version, ulong length) +{ + return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5; +} diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c new file mode 100644 index 00000000000..78864e2c9ac --- /dev/null +++ b/storage/maria/ma_page.c @@ -0,0 +1,160 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read and write key blocks */ + +#include "maria_def.h" + + /* Fetch a key-page in memory */ + +uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, + uchar *buff, int return_buffer) +{ + uchar *tmp; + uint page_size; + DBUG_ENTER("_ma_fetch_keypage"); + DBUG_PRINT("enter",("page: %ld",page)); + + tmp=(uchar*) key_cache_read(info->s->key_cache, + info->s->kfile, page, level, (byte*) buff, + (uint) keyinfo->block_length, + (uint) keyinfo->block_length, + return_buffer); + if (tmp == info->buff) + info->buff_used=1; + else if (!tmp) + { + DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno)); + info->last_keypage=HA_OFFSET_ERROR; + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + info->last_keypage=page; + page_size=maria_getint(tmp); + if (page_size < 4 || page_size > keyinfo->block_length) + { + DBUG_PRINT("error",("page %lu had wrong page length: %u", + (ulong) page, page_size)); + DBUG_DUMP("page", (char*) tmp, keyinfo->block_length); + info->last_keypage = HA_OFFSET_ERROR; + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno = HA_ERR_CRASHED; + tmp = 0; + } + DBUG_RETURN(tmp); +} /* _ma_fetch_keypage */ + + + /* Write a key-page on disk */ + +int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + my_off_t page, int level, uchar *buff) +{ + reg3 uint length; + DBUG_ENTER("_ma_write_keypage"); + +#ifndef FAST /* Safety check */ + if (page < info->s->base.keystart || + page+keyinfo->block_length > info->state->key_file_length || + (page & (MARIA_MIN_KEY_BLOCK_LENGTH-1))) + { + DBUG_PRINT("error",("Trying to write inside key status region: key_start: %lu length: %lu page: %lu", + (long) info->s->base.keystart, + (long) info->state->key_file_length, + (long) page)); + my_errno=EINVAL; + DBUG_RETURN((-1)); + } + DBUG_PRINT("page",("write page at: %lu",(long) page,buff)); + DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); +#endif + + if ((length=keyinfo->block_length) > IO_SIZE*2 && + info->state->key_file_length != page+length) + length= ((maria_getint(buff)+IO_SIZE-1) & (uint) ~(IO_SIZE-1)); +#ifdef HAVE_purify + { + length=maria_getint(buff); + bzero((byte*) buff+length,keyinfo->block_length-length); + length=keyinfo->block_length; + } +#endif + DBUG_RETURN((key_cache_write(info->s->key_cache, + info->s->kfile,page, level, (byte*) buff,length, + (uint) keyinfo->block_length, + (int) ((info->lock_type != F_UNLCK) || + info->s->delay_key_write)))); +} /* maria_write_keypage */ + + + /* Remove page from disk */ + +int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, + int level) +{ + my_off_t old_link; + char buff[8]; + DBUG_ENTER("_ma_dispose"); + DBUG_PRINT("enter",("pos: %ld", (long) pos)); + + old_link=info->s->state.key_del[keyinfo->block_size]; + info->s->state.key_del[keyinfo->block_size]=pos; + mi_sizestore(buff,old_link); + info->s->state.changed|= STATE_NOT_SORTED_PAGES; + DBUG_RETURN(key_cache_write(info->s->key_cache, + info->s->kfile, pos , level, buff, + sizeof(buff), + (uint) keyinfo->block_length, + (int) (info->lock_type != F_UNLCK))); +} /* _ma_dispose */ + + + /* Make new page on disk */ + +my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) +{ + my_off_t pos; + char buff[8]; + DBUG_ENTER("_ma_new"); + + if ((pos=info->s->state.key_del[keyinfo->block_size]) == HA_OFFSET_ERROR) + { + if (info->state->key_file_length >= + info->s->base.max_key_file_length - keyinfo->block_length) + { + my_errno=HA_ERR_INDEX_FILE_FULL; + DBUG_RETURN(HA_OFFSET_ERROR); + } + pos=info->state->key_file_length; + info->state->key_file_length+= keyinfo->block_length; + } + else + { + if (!key_cache_read(info->s->key_cache, + info->s->kfile, pos, level, + buff, + (uint) sizeof(buff), + (uint) keyinfo->block_length,0)) + pos= HA_OFFSET_ERROR; + else + info->s->state.key_del[keyinfo->block_size]=mi_sizekorr(buff); + } + info->s->state.changed|= STATE_NOT_SORTED_PAGES; + DBUG_PRINT("exit",("Pos: %ld",(long) pos)); + DBUG_RETURN(pos); +} /* _ma_new */ diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c new file mode 100644 index 00000000000..90239b3943b --- /dev/null +++ b/storage/maria/ma_panic.c @@ -0,0 +1,124 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "ma_fulltext.h" + +/* + Stop usage of Maria + + SYNOPSIS + maria_panic() + flag HA_PANIC_CLOSE: All maria files (tables and log) are closed. + maria_end() is called. + HA_PANIC_WRITE: All misam files are unlocked and + all changed data in single user maria is + written to file + HA_PANIC_READ All maria files that was locked when + maria_panic(HA_PANIC_WRITE) was done is + locked. A maria_readinfo() is done for + all single user files to get changes + in database + + RETURN + 0 ok + # error number in case of error +*/ + +int maria_panic(enum ha_panic_function flag) +{ + int error=0; + LIST *list_element,*next_open; + MARIA_HA *info; + DBUG_ENTER("maria_panic"); + + pthread_mutex_lock(&THR_LOCK_maria); + for (list_element=maria_open_list ; list_element ; list_element=next_open) + { + next_open=list_element->next; /* Save if close */ + info=(MARIA_HA*) list_element->data; + switch (flag) { + case HA_PANIC_CLOSE: + pthread_mutex_unlock(&THR_LOCK_maria); /* Not exactly right... */ + if (maria_close(info)) + error=my_errno; + pthread_mutex_lock(&THR_LOCK_maria); + break; + case HA_PANIC_WRITE: /* Do this to free databases */ +#ifdef CANT_OPEN_FILES_TWICE + if (info->s->options & HA_OPTION_READ_ONLY_DATA) + break; +#endif + if (flush_key_blocks(info->s->key_cache, info->s->kfile, FLUSH_RELEASE)) + error=my_errno; + if (info->opt_flag & WRITE_CACHE_USED) + if (flush_io_cache(&info->rec_cache)) + error=my_errno; + if (info->opt_flag & READ_CACHE_USED) + { + if (flush_io_cache(&info->rec_cache)) + error=my_errno; + reinit_io_cache(&info->rec_cache,READ_CACHE,0, + (pbool) (info->lock_type != F_UNLCK),1); + } + if (info->lock_type != F_UNLCK && ! info->was_locked) + { + info->was_locked=info->lock_type; + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + } +#ifdef CANT_OPEN_FILES_TWICE + if (info->s->kfile >= 0 && my_close(info->s->kfile,MYF(0))) + error = my_errno; + if (info->dfile >= 0 && my_close(info->dfile,MYF(0))) + error = my_errno; + info->s->kfile=info->dfile= -1; /* Files aren't open anymore */ + break; +#endif + case HA_PANIC_READ: /* Restore to before WRITE */ +#ifdef CANT_OPEN_FILES_TWICE + { /* Open closed files */ + char name_buff[FN_REFLEN]; + if (info->s->kfile < 0) + if ((info->s->kfile= my_open(fn_format(name_buff,info->filename,"", + N_NAME_IEXT,4),info->mode, + MYF(MY_WME))) < 0) + error = my_errno; + if (info->dfile < 0) + { + if ((info->dfile= my_open(fn_format(name_buff,info->filename,"", + N_NAME_DEXT,4),info->mode, + MYF(MY_WME))) < 0) + error = my_errno; + info->rec_cache.file=info->dfile; + } + } +#endif + if (info->was_locked) + { + if (maria_lock_database(info, info->was_locked)) + error=my_errno; + info->was_locked=0; + } + break; + } + } + pthread_mutex_unlock(&THR_LOCK_maria); + if (flag == HA_PANIC_CLOSE) + maria_end(); + if (!error) + DBUG_RETURN(0); + DBUG_RETURN(my_errno=error); +} /* maria_panic */ diff --git a/storage/maria/ma_preload.c b/storage/maria/ma_preload.c new file mode 100644 index 00000000000..f387f2b7de3 --- /dev/null +++ b/storage/maria/ma_preload.c @@ -0,0 +1,117 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Preload indexes into key cache +*/ + +#include "maria_def.h" + + +/* + Preload pages of the index file for a table into the key cache + + SYNOPSIS + maria_preload() + info open table + map map of indexes to preload into key cache + ignore_leaves only non-leaves pages are to be preloaded + + RETURN VALUE + 0 if a success. error code - otherwise. + + NOTES. + At present pages for all indexes are preloaded. + In future only pages for indexes specified in the key_map parameter + of the table will be preloaded. +*/ + +int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves) +{ + uint i; + ulong length, block_length= 0; + uchar *buff= NULL; + MARIA_SHARE* share= info->s; + uint keys= share->state.header.keys; + MARIA_KEYDEF *keyinfo= share->keyinfo; + my_off_t key_file_length= share->state.state.key_file_length; + my_off_t pos= share->base.keystart; + DBUG_ENTER("maria_preload"); + + if (!keys || !maria_is_any_key_active(key_map) || key_file_length == pos) + DBUG_RETURN(0); + + block_length= keyinfo[0].block_length; + + /* Check whether all indexes use the same block size */ + for (i= 1 ; i < keys ; i++) + { + if (keyinfo[i].block_length != block_length) + DBUG_RETURN(my_errno= HA_ERR_NON_UNIQUE_BLOCK_SIZE); + } + + length= info->preload_buff_size/block_length * block_length; + set_if_bigger(length, block_length); + + if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME)))) + DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); + + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_RELEASE)) + goto err; + + do + { + /* Read the next block of index file into the preload buffer */ + if ((my_off_t) length > (key_file_length-pos)) + length= (ulong) (key_file_length-pos); + if (my_pread(share->kfile, (byte*) buff, length, pos, MYF(MY_FAE|MY_FNABP))) + goto err; + + if (ignore_leaves) + { + uchar *end= buff+length; + do + { + if (_ma_test_if_nod(buff)) + { + if (key_cache_insert(share->key_cache, + share->kfile, pos, DFLT_INIT_HITS, + (byte*) buff, block_length)) + goto err; + } + pos+= block_length; + } + while ((buff+= block_length) != end); + buff= end-length; + } + else + { + if (key_cache_insert(share->key_cache, + share->kfile, pos, DFLT_INIT_HITS, + (byte*) buff, length)) + goto err; + pos+= length; + } + } + while (pos != key_file_length); + + my_free((char*) buff, MYF(0)); + DBUG_RETURN(0); + +err: + my_free((char*) buff, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(my_errno= errno); +} diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c new file mode 100644 index 00000000000..0f6883f4c9d --- /dev/null +++ b/storage/maria/ma_range.c @@ -0,0 +1,244 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Gives a approximated number of how many records there is between two keys. + Used when optimizing querries. + */ + +#include "maria_def.h" +#include "ma_rt_index.h" + +static ha_rows _ma_record_pos(MARIA_HA *info,const byte *key,uint key_len, + enum ha_rkey_function search_flag); +static double _ma_search_pos(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key, + uint key_len,uint nextflag,my_off_t pos); +static uint _ma_keynr(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *page, + uchar *keypos,uint *ret_max_key); + + +/* + Estimate how many records there is in a given range + + SYNOPSIS + maria_records_in_range() + info MARIA handler + inx Index to use + min_key Min key. Is = 0 if no min range + max_key Max key. Is = 0 if no max range + + NOTES + We should ONLY return 0 if there is no rows in range + + RETURN + HA_POS_ERROR error (or we can't estimate number of rows) + number Estimated number of rows +*/ + + +ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key, + key_range *max_key) +{ + ha_rows start_pos,end_pos,res; + DBUG_ENTER("maria_records_in_range"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(HA_POS_ERROR); + + if (fast_ma_readinfo(info)) + DBUG_RETURN(HA_POS_ERROR); + info->update&= (HA_STATE_CHANGED+HA_STATE_ROW_CHANGED); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + + switch(info->s->keyinfo[inx].key_alg){ +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + { + uchar * key_buff; + uint start_key_len; + + key_buff= info->lastkey+info->s->base.max_key_length; + start_key_len= _ma_pack_key(info,inx, key_buff, + (uchar*) min_key->key, min_key->length, + (HA_KEYSEG**) 0); + res= maria_rtree_estimate(info, inx, key_buff, start_key_len, + maria_read_vec[min_key->flag]); + res= res ? res : 1; /* Don't return 0 */ + break; + } +#endif + case HA_KEY_ALG_BTREE: + default: + start_pos= (min_key ? + _ma_record_pos(info, min_key->key, min_key->length, + min_key->flag) : + (ha_rows) 0); + end_pos= (max_key ? + _ma_record_pos(info, max_key->key, max_key->length, + max_key->flag) : + info->state->records+ (ha_rows) 1); + res= (end_pos < start_pos ? (ha_rows) 0 : + (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos)); + if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR) + res=HA_POS_ERROR; + } + + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + fast_ma_writeinfo(info); + + DBUG_PRINT("info",("records: %ld",(ulong) (res))); + DBUG_RETURN(res); +} + + + /* Find relative position (in records) for key in index-tree */ + +static ha_rows _ma_record_pos(MARIA_HA *info, const byte *key, uint key_len, + enum ha_rkey_function search_flag) +{ + uint inx=(uint) info->lastinx, nextflag; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+inx; + uchar *key_buff; + double pos; + + DBUG_ENTER("_ma_record_pos"); + DBUG_PRINT("enter",("search_flag: %d",search_flag)); + + if (key_len == 0) + key_len=USE_WHOLE_KEY; + key_buff=info->lastkey+info->s->base.max_key_length; + key_len= _ma_pack_key(info,inx,key_buff,(uchar*) key,key_len, + (HA_KEYSEG**) 0); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg, + (uchar*) key_buff,key_len);); + nextflag=maria_read_vec[search_flag]; + if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) + key_len=USE_WHOLE_KEY; + + pos= _ma_search_pos(info,keyinfo,key_buff,key_len, + nextflag | SEARCH_SAVE_BUFF, + info->s->state.key_root[inx]); + if (pos >= 0.0) + { + DBUG_PRINT("exit",("pos: %ld",(ulong) (pos*info->state->records))); + DBUG_RETURN((ulong) (pos*info->state->records+0.5)); + } + DBUG_RETURN(HA_POS_ERROR); +} + + + /* This is a modified version of _ma_search */ + /* Returns offset for key in indextable (decimal 0.0 <= x <= 1.0) */ + +static double _ma_search_pos(register MARIA_HA *info, + register MARIA_KEYDEF *keyinfo, + uchar *key, uint key_len, uint nextflag, + register my_off_t pos) +{ + int flag; + uint nod_flag,keynr,max_keynr; + my_bool after_key; + uchar *keypos,*buff; + double offset; + DBUG_ENTER("_ma_search_pos"); + + if (pos == HA_OFFSET_ERROR) + DBUG_RETURN(0.5); + + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1))) + goto err; + flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, + &keypos,info->lastkey, &after_key); + nod_flag=_ma_test_if_nod(buff); + keynr= _ma_keynr(info,keyinfo,buff,keypos,&max_keynr); + + if (flag) + { + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); /* error */ + /* + Didn't found match. keypos points at next (bigger) key + Try to find a smaller, better matching key. + Matches keynr + [0-1] + */ + if (flag > 0 && ! nod_flag) + offset= 1.0; + else if ((offset= _ma_search_pos(info,keyinfo,key,key_len,nextflag, + _ma_kpos(nod_flag,keypos))) < 0) + DBUG_RETURN(offset); + } + else + { + /* + Found match. Keypos points at the start of the found key + Matches keynr+1 + */ + offset=1.0; /* Matches keynr+1 */ + if ((nextflag & SEARCH_FIND) && nod_flag && + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || + key_len != USE_WHOLE_KEY)) + { + /* + There may be identical keys in the tree. Try to match on of those. + Matches keynr + [0-1] + */ + if ((offset= _ma_search_pos(info,keyinfo,key,key_len,SEARCH_FIND, + _ma_kpos(nod_flag,keypos))) < 0) + DBUG_RETURN(offset); /* Read error */ + } + } + DBUG_PRINT("info",("keynr: %d offset: %g max_keynr: %d nod: %d flag: %d", + keynr,offset,max_keynr,nod_flag,flag)); + DBUG_RETURN((keynr+offset)/(max_keynr+1)); +err: + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1.0); +} + + + /* Get keynummer of current key and max number of keys in nod */ + +static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, + uchar *keypos, uint *ret_max_key) +{ + uint nod_flag,keynr,max_key; + uchar t_buff[HA_MAX_KEY_BUFF],*end; + + end= page+maria_getint(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + *ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag); + return (uint) (keypos-page)/(keyinfo->keylength+nod_flag); + } + + max_key=keynr=0; + t_buff[0]=0; /* Safety */ + while (page < end) + { + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff)) + return 0; /* Error */ + max_key++; + if (page == keypos) + keynr=max_key; + } + *ret_max_key=max_key; + return(keynr); +} diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c new file mode 100644 index 00000000000..5f65cd2b213 --- /dev/null +++ b/storage/maria/ma_rename.c @@ -0,0 +1,61 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Rename a table +*/ + +#include "ma_fulltext.h" + +int maria_rename(const char *old_name, const char *new_name) +{ + char from[FN_REFLEN],to[FN_REFLEN]; +#ifdef USE_RAID + uint raid_type=0,raid_chunks=0; +#endif + DBUG_ENTER("maria_rename"); + +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(old_name,"rename old_table"); + _ma_check_table_is_closed(new_name,"rename new table2"); +#endif +#ifdef USE_RAID + { + MARIA_HA *info; + if (!(info=maria_open(old_name, O_RDONLY, 0))) + DBUG_RETURN(my_errno); + raid_type = info->s->base.raid_type; + raid_chunks = info->s->base.raid_chunks; + maria_close(info); + } +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(old_name,"rename raidcheck"); +#endif +#endif /* USE_RAID */ + + fn_format(from,old_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + fn_format(to,new_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + if (my_rename_with_symlink(from, to, MYF(MY_WME))) + DBUG_RETURN(my_errno); + fn_format(from,old_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + fn_format(to,new_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); +#ifdef USE_RAID + if (raid_type) + DBUG_RETURN(my_raid_rename(from, to, raid_chunks, MYF(MY_WME)) ? my_errno : + 0); +#endif + DBUG_RETURN(my_rename_with_symlink(from, to,MYF(MY_WME)) ? my_errno : 0); +} diff --git a/storage/maria/ma_rfirst.c b/storage/maria/ma_rfirst.c new file mode 100644 index 00000000000..503e8989936 --- /dev/null +++ b/storage/maria/ma_rfirst.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* Read first row through a specfic key */ + +int maria_rfirst(MARIA_HA *info, byte *buf, int inx) +{ + DBUG_ENTER("maria_rfirst"); + info->lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_PREV_FOUND; + DBUG_RETURN(maria_rnext(info,buf,inx)); +} /* maria_rfirst */ diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c new file mode 100644 index 00000000000..abcce1a2582 --- /dev/null +++ b/storage/maria/ma_rkey.c @@ -0,0 +1,144 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read record based on a key */ + +#include "maria_def.h" +#include "ma_rt_index.h" + + /* Read a record using key */ + /* Ordinary search_flag is 0 ; Give error if no record with key */ + +int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len, + enum ha_rkey_function search_flag) +{ + uchar *key_buff; + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *keyinfo; + HA_KEYSEG *last_used_keyseg; + uint pack_key_length, use_key_length, nextflag; + DBUG_ENTER("maria_rkey"); + DBUG_PRINT("enter", ("base: %lx buf: %lx inx: %d search_flag: %d", + (long) info, (long) buf, inx, search_flag)); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->last_key_func= search_flag; + keyinfo= share->keyinfo + inx; + + if (info->once_flags & USE_PACKED_KEYS) + { + info->once_flags&= ~USE_PACKED_KEYS; /* Reset flag */ + /* + key is already packed!; This happens when we are using a MERGE TABLE + */ + key_buff=info->lastkey+info->s->base.max_key_length; + pack_key_length= key_len; + bmove(key_buff,key,key_len); + last_used_keyseg= 0; + } + else + { + if (key_len == 0) + key_len=USE_WHOLE_KEY; + /* Save the packed key for later use in the second buffer of lastkey. */ + key_buff=info->lastkey+info->s->base.max_key_length; + pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, (uchar*) key, + key_len, &last_used_keyseg); + /* Save packed_key_length for use by the MERGE engine. */ + info->pack_key_length= pack_key_length; + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg, + key_buff, pack_key_length);); + } + + if (fast_ma_readinfo(info)) + goto err; + if (share->concurrent_insert) + rw_rdlock(&share->key_root_lock[inx]); + + nextflag=maria_read_vec[search_flag]; + use_key_length=pack_key_length; + if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) + use_key_length=USE_WHOLE_KEY; + + switch (info->s->keyinfo[inx].key_alg) { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + if (maria_rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + goto err; + } + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!_ma_search(info, keyinfo, key_buff, use_key_length, + maria_read_vec[search_flag], info->s->state.key_root[inx])) + { + while (info->lastpos >= info->state->data_file_length) + { + /* + Skip rows that are inserted by other threads since we got a lock + Note that this can only happen if we are not searching after an + exact key, because the keys are sorted according to position + */ + + if (_ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, + maria_readnext_vec[search_flag], + info->s->state.key_root[inx])) + break; + } + } + } + if (share->concurrent_insert) + rw_unlock(&share->key_root_lock[inx]); + + /* Calculate length of the found key; Used by maria_rnext_same */ + if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg && + info->lastpos != HA_OFFSET_ERROR) + info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey, + last_used_keyseg); + else + info->last_rkey_length= pack_key_length; + + /* Check if we don't want to have record back, only error message */ + if (!buf) + DBUG_RETURN(info->lastpos == HA_OFFSET_ERROR ? my_errno : 0); + + if (!(*info->read_record)(info,info->lastpos,buf)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + + info->lastpos = HA_OFFSET_ERROR; /* Didn't find key */ + + /* Store last used key as a base for read next */ + memcpy(info->lastkey,key_buff,pack_key_length); + info->last_rkey_length= pack_key_length; + bzero((char*) info->lastkey+pack_key_length,info->s->base.rec_reflength); + info->lastkey_length=pack_key_length+info->s->base.rec_reflength; + + if (search_flag == HA_READ_AFTER_KEY) + info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */ +err: + DBUG_RETURN(my_errno); +} /* _ma_rkey */ diff --git a/storage/maria/ma_rlast.c b/storage/maria/ma_rlast.c new file mode 100644 index 00000000000..8ce26afa78d --- /dev/null +++ b/storage/maria/ma_rlast.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* Read last row with the same key as the previous read. */ + +int maria_rlast(MARIA_HA *info, byte *buf, int inx) +{ + DBUG_ENTER("maria_rlast"); + info->lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_NEXT_FOUND; + DBUG_RETURN(maria_rprev(info,buf,inx)); +} /* maria_rlast */ diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c new file mode 100644 index 00000000000..8f342c6a8d2 --- /dev/null +++ b/storage/maria/ma_rnext.c @@ -0,0 +1,122 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#include "ma_rt_index.h" + + /* + Read next row with the same key as previous read + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rnext(MARIA_HA *info, byte *buf, int inx) +{ + int error,changed; + uint flag; + DBUG_ENTER("maria_rnext"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + flag=SEARCH_BIGGER; /* Read next */ + if (info->lastpos == HA_OFFSET_ERROR && info->update & HA_STATE_PREV_FOUND) + flag=0; /* Read first */ + + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + changed= _ma_test_if_changed(info); + if (!flag) + { + switch(info->s->keyinfo[inx].key_alg){ +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + error=maria_rtree_get_first(info,inx,info->lastkey_length); + break; +#endif + case HA_KEY_ALG_BTREE: + default: + error= _ma_search_first(info,info->s->keyinfo+inx, + info->s->state.key_root[inx]); + break; + } + } + else + { + switch (info->s->keyinfo[inx].key_alg) { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + /* + Note that rtree doesn't support that the table + may be changed since last call, so we do need + to skip rows inserted by other threads like in btree + */ + error= maria_rtree_get_next(info,inx,info->lastkey_length); + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!changed) + error= _ma_search_next(info,info->s->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + info->s->state.key_root[inx]); + else + error= _ma_search(info,info->s->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY,flag, info->s->state.key_root[inx]); + } + } + + if (info->s->concurrent_insert) + { + if (!error) + { + while (info->lastpos >= info->state->data_file_length) + { + /* Skip rows inserted by other threads since we got a lock */ + if ((error= _ma_search_next(info,info->s->keyinfo+inx, + info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + } + } + rw_unlock(&info->s->key_root_lock[inx]); + } + /* Don't clear if database-changed */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_NEXT_FOUND; + + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info,info->lastpos,buf)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_PRINT("error",("Got error: %d, errno: %d",error, my_errno)); + DBUG_RETURN(my_errno); +} /* maria_rnext */ diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c new file mode 100644 index 00000000000..b53639073e3 --- /dev/null +++ b/storage/maria/ma_rnext_same.c @@ -0,0 +1,105 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include "ma_rt_index.h" + + /* + Read next row with the same key as previous read, but abort if + the key changes. + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rnext_same(MARIA_HA *info, byte *buf) +{ + int error; + uint inx,not_used[2]; + MARIA_KEYDEF *keyinfo; + DBUG_ENTER("maria_rnext_same"); + + if ((int) (inx=info->lastinx) < 0 || info->lastpos == HA_OFFSET_ERROR) + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + keyinfo=info->s->keyinfo+inx; + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + + switch (keyinfo->key_alg) + { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + if ((error=maria_rtree_find_next(info,inx, + maria_read_vec[info->last_key_func]))) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->lastpos= HA_OFFSET_ERROR; + break; + } + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!(info->update & HA_STATE_RNEXT_SAME)) + { + /* First rnext_same; Store old key */ + memcpy(info->lastkey2,info->lastkey,info->last_rkey_length); + } + for (;;) + { + if ((error= _ma_search_next(info,keyinfo,info->lastkey, + info->lastkey_length,SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + if (ha_key_cmp(keyinfo->seg, info->lastkey, info->lastkey2, + info->last_rkey_length, SEARCH_FIND, not_used)) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->lastpos= HA_OFFSET_ERROR; + break; + } + /* Skip rows that are inserted by other threads since we got a lock */ + if (info->lastpos < info->state->data_file_length) + break; + } + } + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + /* Don't clear if database-changed */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME; + + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info,info->lastpos,buf)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_RETURN(my_errno); +} /* maria_rnext_same */ diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c new file mode 100644 index 00000000000..8dd4498cf8b --- /dev/null +++ b/storage/maria/ma_rprev.c @@ -0,0 +1,88 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* + Read previous row with the same key as previous read + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rprev(MARIA_HA *info, byte *buf, int inx) +{ + int error,changed; + register uint flag; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_rprev"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + flag=SEARCH_SMALLER; /* Read previous */ + if (info->lastpos == HA_OFFSET_ERROR && info->update & HA_STATE_NEXT_FOUND) + flag=0; /* Read last */ + + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + changed= _ma_test_if_changed(info); + if (share->concurrent_insert) + rw_rdlock(&share->key_root_lock[inx]); + if (!flag) + error= _ma_search_last(info, share->keyinfo+inx, + share->state.key_root[inx]); + else if (!changed) + error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + share->state.key_root[inx]); + else + error= _ma_search(info,share->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY, flag, share->state.key_root[inx]); + + if (share->concurrent_insert) + { + if (!error) + { + while (info->lastpos >= info->state->data_file_length) + { + /* Skip rows that are inserted by other threads since we got a lock */ + if ((error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, + info->lastkey_length, + SEARCH_SMALLER, + share->state.key_root[inx]))) + break; + } + } + rw_unlock(&share->key_root_lock[inx]); + } + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_PREV_FOUND; + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info,info->lastpos,buf)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_RETURN(my_errno); +} /* maria_rprev */ diff --git a/storage/maria/ma_rrnd.c b/storage/maria/ma_rrnd.c new file mode 100644 index 00000000000..2f01c0e92c5 --- /dev/null +++ b/storage/maria/ma_rrnd.c @@ -0,0 +1,60 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read a record with random-access. The position to the record must + get by MARIA_HA. The next record can be read with pos= MARIA_POS_ERROR */ + + +#include "maria_def.h" + +/* + Read a row based on position. + If filepos= HA_OFFSET_ERROR then read next row + Return values + Returns one of following values: + 0 = Ok. + HA_ERR_RECORD_DELETED = Record is deleted. + HA_ERR_END_OF_FILE = EOF. +*/ + +int maria_rrnd(MARIA_HA *info, byte *buf, register my_off_t filepos) +{ + my_bool skip_deleted_blocks; + DBUG_ENTER("maria_rrnd"); + + skip_deleted_blocks=0; + + if (filepos == HA_OFFSET_ERROR) + { + skip_deleted_blocks=1; + if (info->lastpos == HA_OFFSET_ERROR) /* First read ? */ + filepos= info->s->pack.header_length; /* Read first record */ + else + filepos= info->nextpos; + } + + if (info->once_flags & RRND_PRESERVE_LASTINX) + info->once_flags&= ~RRND_PRESERVE_LASTINX; + else + info->lastinx= -1; /* Can't forward or backward */ + /* Init all but update-flag */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + + DBUG_RETURN ((*info->s->read_rnd)(info,buf,filepos,skip_deleted_blocks)); +} diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c new file mode 100644 index 00000000000..913ae3b4370 --- /dev/null +++ b/storage/maria/ma_rsame.c @@ -0,0 +1,66 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* + ** Find current row with read on position or read on key + ** If inx >= 0 find record using key + ** Return values: + ** 0 = Ok. + ** HA_ERR_KEY_NOT_FOUND = Row is deleted + ** HA_ERR_END_OF_FILE = End of file + */ + + +int maria_rsame(MARIA_HA *info, byte *record, int inx) +{ + DBUG_ENTER("maria_rsame"); + + if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx)) + { + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + } + if (info->lastpos == HA_OFFSET_ERROR || info->update & HA_STATE_DELETED) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No current record */ + } + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + /* Read row from data file */ + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + + if (inx >= 0) + { + info->lastinx=inx; + info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, + info->lastpos); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + VOID(_ma_search(info,info->s->keyinfo+inx,info->lastkey, USE_WHOLE_KEY, + SEARCH_SAME, + info->s->state.key_root[inx])); + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + } + + if (!(*info->read_record)(info,info->lastpos,record)) + DBUG_RETURN(0); + if (my_errno == HA_ERR_RECORD_DELETED) + my_errno=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(my_errno); +} /* maria_rsame */ diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c new file mode 100644 index 00000000000..e30d41dd46c --- /dev/null +++ b/storage/maria/ma_rsamepos.c @@ -0,0 +1,56 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* read record through position and fix key-position */ +/* As maria_rsame but supply a position */ + +#include "maria_def.h" + + + /* + ** If inx >= 0 update index pointer + ** Returns one of the following values: + ** 0 = Ok. + ** HA_ERR_KEY_NOT_FOUND = Row is deleted + ** HA_ERR_END_OF_FILE = End of file + */ + +int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, my_off_t filepos) +{ + DBUG_ENTER("maria_rsame_with_pos"); + + if (inx < -1 || ! maria_is_key_active(info->s->state.key_map, inx)) + { + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if ((*info->s->read_rnd)(info,record,filepos,0)) + { + if (my_errno == HA_ERR_RECORD_DELETED) + my_errno=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(my_errno); + } + info->lastpos=filepos; + info->lastinx=inx; + if (inx >= 0) + { + info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, + info->lastpos); + info->update|=HA_STATE_KEY_CHANGED; /* Don't use indexposition */ + } + DBUG_RETURN(0); +} /* maria_rsame_pos */ diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c new file mode 100644 index 00000000000..ff10ae72027 --- /dev/null +++ b/storage/maria/ma_rt_index.c @@ -0,0 +1,1081 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +#define REINSERT_BUFFER_INC 10 +#define PICK_BY_AREA +/*#define PICK_BY_PERIMETER*/ + +typedef struct st_page_level +{ + uint level; + my_off_t offs; +} stPageLevel; + +typedef struct st_page_list +{ + ulong n_pages; + ulong m_pages; + stPageLevel *pages; +} stPageList; + + +/* + Find next key in r-tree according to search_flag recursively + + NOTES + Used in maria_rtree_find_first() and maria_rtree_find_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint search_flag, + uint nod_cmp_flag, my_off_t page, int level) +{ + uchar *k; + uchar *last; + uint nod_flag; + int res; + uchar *page_buf; + int k_len; + uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->maria_rtree_recursion_depth >= level) + { + k = page_buf + *saved_key; + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + if (!(res = maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, k, + info->last_rkey_length, nod_cmp_flag))) + { + switch ((res = maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, + _ma_kpos(nod_flag, k), level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->maria_rtree_recursion_depth = level; + break; + default: /* error */ + case -1: + goto err1; + } + } + } + else + { + /* this is a leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, k, + info->last_rkey_length, search_flag)) + { + uchar *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + info->maria_rtree_recursion_depth = level; + *saved_key = last - page_buf; + + if (after_key < last) + { + info->int_keypos = info->buff; + info->int_maxpos = info->buff + (last - after_key); + memcpy(info->buff, after_key, last - after_key); + info->buff_used = 0; + } + else + { + info->buff_used = 1; + } + + res = 0; + goto ok; + } + } + } + info->lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Find first key in r-tree according to search_flag condition + + SYNOPSIS + maria_rtree_find_first() + info Handler to MARIA file + uint keynr Key number to use + key Key to search for + key_length Length of 'key' + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key, uint key_length, + uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + /* Save searched key */ + memcpy(info->first_mbr_key, key, keyinfo->keylength - + info->s->base.rec_reflength); + info->last_rkey_length = key_length; + + info->maria_rtree_recursion_depth = -1; + info->buff_used = 1; + + nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); +} + + +/* + Find next key in r-tree according to search_flag condition + + SYNOPSIS + maria_rtree_find_next() + info Handler to MARIA file + uint keynr Key number to use + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (info->update & HA_STATE_DELETED) + return maria_rtree_find_first(info, keynr, info->lastkey, info->lastkey_length, + search_flag); + + if (!info->buff_used) + { + uchar *key= info->int_keypos; + + while (key < info->int_maxpos) + { + if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, key, + info->last_rkey_length, search_flag)) + { + uchar *after_key = key + keyinfo->keylength; + + info->lastpos= _ma_dpos(info, 0, after_key); + memcpy(info->lastkey, key, info->lastkey_length); + + if (after_key < info->int_maxpos) + info->int_keypos= after_key; + else + info->buff_used= 1; + return 0; + } + key+= keyinfo->keylength; + } + } + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); +} + + +/* + Get next key in r-tree recursively + + NOTES + Used in maria_rtree_get_first() and maria_rtree_get_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_length, + my_off_t page, int level) +{ + uchar *k; + uchar *last; + uint nod_flag; + int res; + uchar *page_buf; + uint k_len; + uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + return -1; + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->maria_rtree_recursion_depth >= level) + { + k = page_buf + *saved_key; + if (!nod_flag) + { + /* Only leaf pages contain data references. */ + /* Need to check next key with data reference. */ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + } + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + switch ((res = maria_rtree_get_req(info, keyinfo, key_length, + _ma_kpos(nod_flag, k), level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->maria_rtree_recursion_depth = level; + break; + default: + case -1: /* error */ + goto err1; + } + } + else + { + /* this is a leaf */ + uchar *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + + info->maria_rtree_recursion_depth = level; + *saved_key = k - page_buf; + + if (after_key < last) + { + info->int_keypos = (uchar*)saved_key; + memcpy(info->buff, page_buf, keyinfo->block_length); + info->int_maxpos = rt_PAGE_END(info->buff); + info->buff_used = 0; + } + else + { + info->buff_used = 1; + } + + res = 0; + goto ok; + } + } + info->lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Get first key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length) +{ + my_off_t root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + info->maria_rtree_recursion_depth = -1; + info->buff_used = 1; + + return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); +} + + +/* + Get next key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length) +{ + my_off_t root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (!info->buff_used) + { + uint k_len = keyinfo->keylength - info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(info->int_keypos) */ + uchar *key = info->buff + *(int*)info->int_keypos + k_len + + info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(key) */ + uchar *after_key = key + k_len + info->s->base.rec_reflength; + + info->lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength); + + *(int*)info->int_keypos = key - info->buff; + if (after_key >= info->int_maxpos) + { + info->buff_used = 1; + } + + return 0; + } + else + { + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); + } +} + + +/* + Choose non-leaf better key for insertion +*/ + +#ifdef PICK_BY_PERIMETER +static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double perimeter; + double best_perimeter; + uchar *best_key; + uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + uchar *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_perimeter); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if ((increase = maria_rtree_perimeter_increase(keyinfo->seg, k, key, key_length, + &perimeter)) == -1) + return NULL; + if ((increase < best_incr)|| + (increase == best_incr && perimeter < best_perimeter)) + { + best_key = k; + best_perimeter= perimeter; + best_incr = increase; + } + } + return best_key; +} + +#endif /*PICK_BY_PERIMETER*/ + +#ifdef PICK_BY_AREA +static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double area; + double best_area; + uchar *best_key; + uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + uchar *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_area); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + /* The following is safe as -1.0 is an exact number */ + if ((increase = maria_rtree_area_increase(keyinfo->seg, k, key, key_length, + &area)) == -1.0) + return NULL; + /* The following should be safe, even if we compare doubles */ + if (increase < best_incr) + { + best_key = k; + best_area = area; + best_incr = increase; + } + else + { + /* The following should be safe, even if we compare doubles */ + if ((increase == best_incr) && (area < best_area)) + { + best_key = k; + best_area = area; + best_incr = increase; + } + } + } + return best_key; +} + +#endif /*PICK_BY_AREA*/ + +/* + Go down and insert key into tree + + RETURN + -1 Error + 0 Child was not split + 1 Child was split +*/ + +static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t page, my_off_t *new_page, + int ins_level, int level) +{ + uchar *k; + uint nod_flag; + uchar *page_buf; + int res; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + if ((ins_level == -1 && nod_flag) || /* key: go down to leaf */ + (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */ + { + if ((k = maria_rtree_pick_key(info, keyinfo, key, key_length, page_buf, + nod_flag)) == NULL) + goto err1; + switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, + _ma_kpos(nod_flag, k), new_page, ins_level, level + 1))) + { + case 0: /* child was not split */ + { + maria_rtree_combine_rect(keyinfo->seg, k, key, k, key_length); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + case 1: /* child was split */ + { + uchar *new_key = page_buf + keyinfo->block_length + nod_flag; + /* set proper MBR for key */ + if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, + _ma_kpos(nod_flag, k))) + goto err1; + /* add new key for new page */ + _ma_kpointer(info, new_key - nod_flag, *new_page); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, *new_page)) + goto err1; + res = maria_rtree_add_key(info, keyinfo, new_key, key_length, + page_buf, new_page); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + default: + case -1: /* error */ + { + goto err1; + } + } + } + else + { + res = maria_rtree_add_key(info, keyinfo, key, key_length, page_buf, new_page); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return -1; +} + + +/* + Insert key into the tree + + RETURN + -1 Error + 0 Root was not split + 1 Root was split +*/ + +static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key, + uint key_length, int ins_level) +{ + my_off_t old_root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + int res; + my_off_t new_page; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + int res; + + if ((old_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + return -1; + info->buff_used = 1; + maria_putint(info->buff, 2, 0); + res = maria_rtree_add_key(info, keyinfo, key, key_length, info->buff, NULL); + if (_ma_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff)) + return 1; + info->s->state.key_root[keynr] = old_root; + return res; + } + + switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, + old_root, &new_page, ins_level, 0))) + { + case 0: /* root was not split */ + { + break; + } + case 1: /* root was split, grow a new root */ + { + uchar *new_root_buf; + my_off_t new_root; + uchar *new_key; + uint nod_flag = info->s->base.key_reflength; + + if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + + maria_putint(new_root_buf, 2, nod_flag); + if ((new_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + goto err1; + + new_key = new_root_buf + keyinfo->block_length + nod_flag; + + _ma_kpointer(info, new_key - nod_flag, old_root); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, old_root)) + goto err1; + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, NULL) + == -1) + goto err1; + _ma_kpointer(info, new_key - nod_flag, new_page); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, new_page)) + goto err1; + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, NULL) + == -1) + goto err1; + if (_ma_write_keypage(info, keyinfo, new_root, + DFLT_INIT_HITS, new_root_buf)) + goto err1; + info->s->state.key_root[keynr] = new_root; + + my_afree((byte*)new_root_buf); + break; +err1: + my_afree((byte*)new_root_buf); + return -1; + } + default: + case -1: /* error */ + { + break; + } + } + return res; +} + + +/* + Insert key into the tree - interface function + + RETURN + -1 Error + 0 OK +*/ + +int maria_rtree_insert(MARIA_HA *info, uint keynr, uchar *key, uint key_length) +{ + return (!key_length || + (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ? -1 : 0; +} + + +/* + Fill reinsert page buffer + + RETURN + -1 Error + 0 OK +*/ + +static int maria_rtree_fill_reinsert_list(stPageList *ReinsertList, my_off_t page, + int level) +{ + if (ReinsertList->n_pages == ReinsertList->m_pages) + { + ReinsertList->m_pages += REINSERT_BUFFER_INC; + if (!(ReinsertList->pages = (stPageLevel*)my_realloc((gptr)ReinsertList->pages, + ReinsertList->m_pages * sizeof(stPageLevel), MYF(MY_ALLOW_ZERO_PTR)))) + goto err1; + } + /* save page to ReinsertList */ + ReinsertList->pages[ReinsertList->n_pages].offs = page; + ReinsertList->pages[ReinsertList->n_pages].level = level; + ReinsertList->n_pages++; + return 0; + +err1: + return -1; +} + + +/* + Go down and delete key from the tree + + RETURN + -1 Error + 0 Deleted + 1 Not found + 2 Empty leaf +*/ + +static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t page, uint *page_size, + stPageList *ReinsertList, int level) +{ + uchar *k; + uchar *last; + ulong i; + uint nod_flag; + uchar *page_buf; + int res; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), ++i) + { + if (nod_flag) + { + /* not leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + { + switch ((res = maria_rtree_delete_req(info, keyinfo, key, key_length, + _ma_kpos(nod_flag, k), page_size, ReinsertList, level + 1))) + { + case 0: /* deleted */ + { + /* test page filling */ + if (*page_size + key_length >= rt_PAGE_MIN_SIZE(keyinfo->block_length)) + { + /* OK */ + if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, + _ma_kpos(nod_flag, k))) + goto err1; + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + } + else + { + /* too small: delete key & add it descendant to reinsert list */ + if (maria_rtree_fill_reinsert_list(ReinsertList, _ma_kpos(nod_flag, k), + level + 1)) + goto err1; + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = maria_getint(page_buf); + } + + goto ok; + } + case 1: /* not found - continue searching */ + { + break; + } + case 2: /* vacuous case: last key in the leaf */ + { + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = maria_getint(page_buf); + res = 0; + goto ok; + } + default: /* error */ + case -1: + { + goto err1; + } + } + } + } + else + { + /* leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_EQUAL | MBR_DATA)) + { + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + *page_size = maria_getint(page_buf); + if (*page_size == 2) + { + /* last key in the leaf */ + res = 2; + if (_ma_dispose(info, keyinfo, page, DFLT_INIT_HITS)) + goto err1; + } + else + { + res = 0; + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + } + goto ok; + } + } + } + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return -1; +} + + +/* + Delete key - interface function + + RETURN + -1 Error + 0 Deleted +*/ + +int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length) +{ + uint page_size; + stPageList ReinsertList; + my_off_t old_root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + ReinsertList.pages = NULL; + ReinsertList.n_pages = 0; + ReinsertList.m_pages = 0; + + switch (maria_rtree_delete_req(info, keyinfo, key, key_length, old_root, + &page_size, &ReinsertList, 0)) + { + case 2: + { + info->s->state.key_root[keynr] = HA_OFFSET_ERROR; + return 0; + } + case 0: + { + uint nod_flag; + ulong i; + for (i = 0; i < ReinsertList.n_pages; ++i) + { + uchar *page_buf; + uint nod_flag; + uchar *k; + uchar *last; + + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + goto err1; + } + if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if (maria_rtree_insert_level(info, keynr, k, key_length, + ReinsertList.pages[i].level) == -1) + { + my_afree((byte*)page_buf); + goto err1; + } + } + my_afree((byte*)page_buf); + if (_ma_dispose(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS)) + goto err1; + } + if (ReinsertList.pages) + my_free((byte*) ReinsertList.pages, MYF(0)); + + /* check for redundant root (not leaf, 1 child) and eliminate */ + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + goto err1; + if (!_ma_fetch_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, + info->buff, 0)) + goto err1; + nod_flag = _ma_test_if_nod(info->buff); + page_size = maria_getint(info->buff); + if (nod_flag && (page_size == 2 + key_length + nod_flag)) + { + my_off_t new_root = _ma_kpos(nod_flag, + rt_PAGE_FIRST_KEY(info->buff, nod_flag)); + if (_ma_dispose(info, keyinfo, old_root, DFLT_INIT_HITS)) + goto err1; + info->s->state.key_root[keynr] = new_root; + } + info->update= HA_STATE_DELETED; + return 0; + +err1: + return -1; + } + case 1: /* not found */ + { + my_errno = HA_ERR_KEY_NOT_FOUND; + return -1; + } + default: + case -1: /* error */ + { + return -1; + } + } +} + + +/* + Estimate number of suitable keys in the tree + + RETURN + estimated value +*/ + +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, + uint key_length, uint flag) +{ + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + my_off_t root; + uint i = 0; + uchar *k; + uchar *last; + uint nod_flag; + uchar *page_buf; + uint k_len; + double area = 0; + ha_rows res = 0; + + if (flag & MBR_DISJOINT) + return info->state->records; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + return HA_POS_ERROR; + if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + return HA_POS_ERROR; + if (!_ma_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), ++i) + { + if (nod_flag) + { + double k_area = maria_rtree_rect_volume(keyinfo->seg, k, key_length); + + /* The following should be safe, even if we compare doubles */ + if (k_area == 0) + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area += 1; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + area += 1; + } + else + goto err1; + } + else + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area += maria_rtree_overlapping_area(keyinfo->seg, key, k, key_length) / + k_area; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + area += maria_rtree_rect_volume(keyinfo->seg, key, key_length) / + k_area; + } + else + goto err1; + } + } + else + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, flag)) + ++res; + } + } + if (nod_flag) + { + if (i) + res = (ha_rows) (area / i * info->state->records); + else + res = HA_POS_ERROR; + } + + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return HA_POS_ERROR; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_index.h b/storage/maria/ma_rt_index.h new file mode 100644 index 00000000000..ff431d81372 --- /dev/null +++ b/storage/maria/ma_rt_index.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_index_h +#define _rt_index_h + +#ifdef HAVE_RTREE_KEYS + +#define rt_PAGE_FIRST_KEY(page, nod_flag) (page + 2 + nod_flag) +#define rt_PAGE_NEXT_KEY(key, key_length, nod_flag) (key + key_length + \ + (nod_flag ? nod_flag : info->s->base.rec_reflength)) +#define rt_PAGE_END(page) (page + maria_getint(page)) + +#define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length) / 3) + +int maria_rtree_insert(MARIA_HA *info, uint keynr, uchar *key, uint key_length); +int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length); + +int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key, uint key_length, + uint search_flag); +int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag); + +int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length); +int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length); + +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, + uint key_length, uint flag); + +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_length, my_off_t *new_page_offs); + +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_index_h */ diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c new file mode 100644 index 00000000000..2732fefffbe --- /dev/null +++ b/storage/maria/ma_rt_key.c @@ -0,0 +1,100 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +/* + Add key to the page + + RESULT VALUES + -1 Error + 0 Not split + 1 Split +*/ + +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, my_off_t *new_page) +{ + uint page_size = maria_getint(page_buf); + uint nod_flag = _ma_test_if_nod(page_buf); + + if (page_size + key_length + info->s->base.rec_reflength <= + keyinfo->block_length) + { + /* split won't be necessary */ + if (nod_flag) + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key - nod_flag, key_length + nod_flag); + page_size += key_length + nod_flag; + } + else + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key, key_length + + info->s->base.rec_reflength); + page_size += key_length + info->s->base.rec_reflength; + } + maria_putint(page_buf, page_size, nod_flag); + return 0; + } + + return (maria_rtree_split_page(info, keyinfo, page_buf, key, key_length, + new_page) ? -1 : 1); +} + +/* + Delete key from the page +*/ +int maria_rtree_delete_key(MARIA_HA *info, uchar *page_buf, uchar *key, + uint key_length, uint nod_flag) +{ + uint16 page_size = maria_getint(page_buf); + uchar *key_start; + + key_start= key - nod_flag; + if (!nod_flag) + key_length += info->s->base.rec_reflength; + + memmove(key_start, key + key_length, page_size - key_length - + (key - page_buf)); + page_size-= key_length + nod_flag; + + maria_putint(page_buf, page_size, nod_flag); + return 0; +} + + +/* + Calculate and store key MBR +*/ + +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t child_page) +{ + if (!_ma_fetch_keypage(info, keyinfo, child_page, + DFLT_INIT_HITS, info->buff, 0)) + return -1; + + return maria_rtree_page_mbr(info, keyinfo->seg, info->buff, key, key_length); +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_key.h b/storage/maria/ma_rt_key.h new file mode 100644 index 00000000000..448024ed8c5 --- /dev/null +++ b/storage/maria/ma_rt_key.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Ramil Kalimullin, who has a shared copyright to this code */ + +#ifndef _rt_key_h +#define _rt_key_h + +#ifdef HAVE_RTREE_KEYS + +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, uchar *page_buf, my_off_t *new_page); +int maria_rtree_delete_key(MARIA_HA *info, uchar *page, uchar *key, + uint key_length, uint nod_flag); +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_length, my_off_t child_page); + +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_key_h */ diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c new file mode 100644 index 00000000000..83d3a0a2f1c --- /dev/null +++ b/storage/maria/ma_rt_mbr.c @@ -0,0 +1,801 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_mbr.h" + +#define INTERSECT_CMP(amin, amax, bmin, bmax) ((amin > bmax) || (bmin > amax)) +#define CONTAIN_CMP(amin, amax, bmin, bmax) ((bmin > amin) || (bmax < amax)) +#define WITHIN_CMP(amin, amax, bmin, bmax) ((amin > bmin) || (amax < bmax)) +#define DISJOINT_CMP(amin, amax, bmin, bmax) ((amin <= bmax) && (bmin <= amax)) +#define EQUAL_CMP(amin, amax, bmin, bmax) ((amin != bmin) || (amax != bmax)) + +#define FCMP(A, B) ((int)(A) - (int)(B)) +#define p_inc(A, B, X) {A += X; B += X;} + +#define RT_CMP(nextflag) \ + if (nextflag & MBR_INTERSECT) \ + { \ + if (INTERSECT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_CONTAIN) \ + { \ + if (CONTAIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_WITHIN) \ + { \ + if (WITHIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_EQUAL) \ + { \ + if (EQUAL_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else /* if (nextflag & MBR_DISJOINT) */ \ + { \ + if (DISJOINT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } + +#define RT_CMP_KORR(type, korr_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + RT_CMP(nextflag); \ +} + +#define RT_CMP_GET(type, get_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + RT_CMP(nextflag); \ +} + +/* + Compares two keys a and b depending on nextflag + nextflag can contain these flags: + MBR_INTERSECT(a,b) a overlaps b + MBR_CONTAIN(a,b) a contains b + MBR_DISJOINT(a,b) a disjoint b + MBR_WITHIN(a,b) a within b + MBR_EQUAL(a,b) All coordinates of MBRs are equal + MBR_DATA(a,b) Data reference is the same + Returns 0 on success. +*/ +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *b, uchar *a, uint key_length, + uint nextflag) +{ + for (; (int) key_length > 0; keyseg += 2 ) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_CMP_KORR(int8, mi_sint1korr, 1, nextflag); + break; + case HA_KEYTYPE_BINARY: + RT_CMP_KORR(uint8, mi_uint1korr, 1, nextflag); + break; + case HA_KEYTYPE_SHORT_INT: + RT_CMP_KORR(int16, mi_sint2korr, 2, nextflag); + break; + case HA_KEYTYPE_USHORT_INT: + RT_CMP_KORR(uint16, mi_uint2korr, 2, nextflag); + break; + case HA_KEYTYPE_INT24: + RT_CMP_KORR(int32, mi_sint3korr, 3, nextflag); + break; + case HA_KEYTYPE_UINT24: + RT_CMP_KORR(uint32, mi_uint3korr, 3, nextflag); + break; + case HA_KEYTYPE_LONG_INT: + RT_CMP_KORR(int32, mi_sint4korr, 4, nextflag); + break; + case HA_KEYTYPE_ULONG_INT: + RT_CMP_KORR(uint32, mi_uint4korr, 4, nextflag); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_CMP_KORR(longlong, mi_sint8korr, 8, nextflag) + break; + case HA_KEYTYPE_ULONGLONG: + RT_CMP_KORR(ulonglong, mi_uint8korr, 8, nextflag) + break; +#endif + case HA_KEYTYPE_FLOAT: + /* The following should be safe, even if we compare doubles */ + RT_CMP_GET(float, mi_float4get, 4, nextflag); + break; + case HA_KEYTYPE_DOUBLE: + RT_CMP_GET(double, mi_float8get, 8, nextflag); + break; + case HA_KEYTYPE_END: + goto end; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + +end: + if (nextflag & MBR_DATA) + { + uchar *end = a + keyseg->length; + do + { + if (*a++ != *b++) + return FCMP(a[-1], b[-1]); + } while (a != end); + } + return 0; +} + +#define RT_VOL_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + amax = korr_func(a+len); \ + res *= (cast(amax) - cast(amin)); \ +} + +#define RT_VOL_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + get_func(amax, a+len); \ + res *= (cast(amax) - cast(amin)); \ +} + +/* + Calculates rectangle volume +*/ +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar *a, uint key_length) +{ + double res = 1; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_VOL_KORR(int8, mi_sint1korr, 1, (double)); + break; + case HA_KEYTYPE_BINARY: + RT_VOL_KORR(uint8, mi_uint1korr, 1, (double)); + break; + case HA_KEYTYPE_SHORT_INT: + RT_VOL_KORR(int16, mi_sint2korr, 2, (double)); + break; + case HA_KEYTYPE_USHORT_INT: + RT_VOL_KORR(uint16, mi_uint2korr, 2, (double)); + break; + case HA_KEYTYPE_INT24: + RT_VOL_KORR(int32, mi_sint3korr, 3, (double)); + break; + case HA_KEYTYPE_UINT24: + RT_VOL_KORR(uint32, mi_uint3korr, 3, (double)); + break; + case HA_KEYTYPE_LONG_INT: + RT_VOL_KORR(int32, mi_sint4korr, 4, (double)); + break; + case HA_KEYTYPE_ULONG_INT: + RT_VOL_KORR(uint32, mi_uint4korr, 4, (double)); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, (double)); + break; + case HA_KEYTYPE_ULONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, ulonglong2double); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_VOL_GET(float, mi_float4get, 4, (double)); + break; + case HA_KEYTYPE_DOUBLE: + RT_VOL_GET(double, mi_float8get, 8, (double)); + break; + case HA_KEYTYPE_END: + key_length = 0; + break; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + } + return res; +} + +#define RT_D_MBR_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + amax = korr_func(a+len); \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ +} + +#define RT_D_MBR_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + get_func(amax, a+len); \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ +} + + +/* + Creates an MBR as an array of doubles. +*/ + +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res) +{ + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_D_MBR_KORR(int8, mi_sint1korr, 1, (double)); + break; + case HA_KEYTYPE_BINARY: + RT_D_MBR_KORR(uint8, mi_uint1korr, 1, (double)); + break; + case HA_KEYTYPE_SHORT_INT: + RT_D_MBR_KORR(int16, mi_sint2korr, 2, (double)); + break; + case HA_KEYTYPE_USHORT_INT: + RT_D_MBR_KORR(uint16, mi_uint2korr, 2, (double)); + break; + case HA_KEYTYPE_INT24: + RT_D_MBR_KORR(int32, mi_sint3korr, 3, (double)); + break; + case HA_KEYTYPE_UINT24: + RT_D_MBR_KORR(uint32, mi_uint3korr, 3, (double)); + break; + case HA_KEYTYPE_LONG_INT: + RT_D_MBR_KORR(int32, mi_sint4korr, 4, (double)); + break; + case HA_KEYTYPE_ULONG_INT: + RT_D_MBR_KORR(uint32, mi_uint4korr, 4, (double)); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, (double)); + break; + case HA_KEYTYPE_ULONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, ulonglong2double); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_D_MBR_GET(float, mi_float4get, 4, (double)); + break; + case HA_KEYTYPE_DOUBLE: + RT_D_MBR_GET(double, mi_float8get, 8, (double)); + break; + case HA_KEYTYPE_END: + key_length = 0; + break; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + } + return 0; +} + +#define RT_COMB_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + store_func(c+len, amax); \ +} + +#define RT_COMB_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + store_func(c+len, amax); \ +} + +/* + Creates common minimal bounding rectungle + for two input rectagnles a and b + Result is written to c +*/ + +int maria_rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c, + uint key_length) +{ + for ( ; (int) key_length > 0 ; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_COMB_KORR(int8, mi_sint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_BINARY: + RT_COMB_KORR(uint8, mi_uint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_COMB_KORR(int16, mi_sint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_COMB_KORR(uint16, mi_uint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_INT24: + RT_COMB_KORR(int32, mi_sint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_UINT24: + RT_COMB_KORR(uint32, mi_uint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_COMB_KORR(int32, mi_sint4korr, mi_int4store, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_COMB_KORR(uint32, mi_uint4korr, mi_int4store, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_COMB_KORR(longlong, mi_sint8korr, mi_int8store, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_COMB_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_COMB_GET(float, mi_float4get, mi_float4store, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_COMB_GET(double, mi_float8get, mi_float8store, 8); + break; + case HA_KEYTYPE_END: + return 0; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + c+= keyseg_length; + } + return 0; +} + + +#define RT_OVL_AREA_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ +} + +#define RT_OVL_AREA_GET(type, get_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ +} + +/* +Calculates overlapping area of two MBRs a & b +*/ +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b, + uint key_length) +{ + double res = 1; + for (; (int) key_length > 0 ; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_OVL_AREA_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_OVL_AREA_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_OVL_AREA_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_OVL_AREA_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_OVL_AREA_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_OVL_AREA_KORR(uint32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_OVL_AREA_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_OVL_AREA_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_OVL_AREA_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_OVL_AREA_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + return res; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + return res; +} + +#define RT_AREA_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + a_area *= (((double)amax) - ((double)amin)); \ + loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +#define RT_AREA_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + a_area *= (((double)amax) - ((double)amin)); \ + loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +/* +Calculates MBR_AREA(a+b) - MBR_AREA(a) +*/ +double maria_rtree_area_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, + uint key_length, double *ab_area) +{ + double a_area= 1.0; + double loc_ab_area= 1.0; + + *ab_area= 1.0; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + + if (keyseg->null_bit) /* Handle NULL part */ + return -1; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_AREA_INC_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_AREA_INC_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_AREA_INC_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_AREA_INC_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_AREA_INC_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_AREA_INC_KORR(int32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_AREA_INC_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_AREA_INC_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_AREA_INC_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_AREA_INC_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + goto safe_end; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } +safe_end: + *ab_area= loc_ab_area; + return loc_ab_area - a_area; +} + +#define RT_PERIM_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + a_perim+= (((double)amax) - ((double)amin)); \ + *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +#define RT_PERIM_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + a_perim+= (((double)amax) - ((double)amin)); \ + *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +/* +Calculates MBR_PERIMETER(a+b) - MBR_PERIMETER(a) +*/ +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, + uint key_length, double *ab_perim) +{ + double a_perim = 0.0; + + *ab_perim= 0.0; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + + if (keyseg->null_bit) /* Handle NULL part */ + return -1; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_PERIM_INC_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_PERIM_INC_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_PERIM_INC_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_PERIM_INC_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_PERIM_INC_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_PERIM_INC_KORR(int32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_PERIM_INC_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_PERIM_INC_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_PERIM_INC_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_PERIM_INC_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + return *ab_perim - a_perim; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + return *ab_perim - a_perim; +} + + +#define RT_PAGE_MBR_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(k + inc); \ + amax = korr_func(k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + bmin = korr_func(k + inc); \ + bmax = korr_func(k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ +} + +#define RT_PAGE_MBR_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, k + inc); \ + get_func(amax, k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + get_func(bmin, k + inc); \ + get_func(bmax, k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ +} + +/* +Calculates key page total MBR = MBR(key1) + MBR(key2) + ... +*/ +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, uchar *page_buf, + uchar *c, uint key_length) +{ + uint inc = 0; + uint k_len = key_length; + uint nod_flag = _ma_test_if_nod(page_buf); + uchar *k; + uchar *last = rt_PAGE_END(page_buf); + + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + /* Handle NULL part */ + if (keyseg->null_bit) + { + return 1; + } + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_PAGE_MBR_KORR(int8, mi_sint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_BINARY: + RT_PAGE_MBR_KORR(uint8, mi_uint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_PAGE_MBR_KORR(int16, mi_sint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_PAGE_MBR_KORR(uint16, mi_uint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_INT24: + RT_PAGE_MBR_KORR(int32, mi_sint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_UINT24: + RT_PAGE_MBR_KORR(uint32, mi_uint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_PAGE_MBR_KORR(int32, mi_sint4korr, mi_int4store, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_PAGE_MBR_KORR(uint32, mi_uint4korr, mi_int4store, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PAGE_MBR_KORR(longlong, mi_sint8korr, mi_int8store, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_PAGE_MBR_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_PAGE_MBR_GET(float, mi_float4get, mi_float4store, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_PAGE_MBR_GET(double, mi_float8get, mi_float8store, 8); + break; + case HA_KEYTYPE_END: + return 0; + default: + return 1; + } + } + return 0; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_mbr.h b/storage/maria/ma_rt_mbr.h new file mode 100644 index 00000000000..81e2a6851d4 --- /dev/null +++ b/storage/maria/ma_rt_mbr.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_mbr_h +#define _rt_mbr_h + +#ifdef HAVE_RTREE_KEYS + +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, uint key_length, + uint nextflag); +int maria_rtree_combine_rect(HA_KEYSEG *keyseg,uchar *, uchar *, uchar*, + uint key_length); +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar*, uint key_length); +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res); +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length); +double maria_rtree_area_increase(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length, double *ab_area); +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, + uint key_length, double *ab_perim); +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, uchar *page_buf, + uchar* c, uint key_length); +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_mbr_h */ diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c new file mode 100644 index 00000000000..034799efd89 --- /dev/null +++ b/storage/maria/ma_rt_split.c @@ -0,0 +1,350 @@ +/* Copyright (C) 2006 MySQL AB & Alexey Botchkov & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +typedef struct +{ + double square; + int n_node; + uchar *key; + double *coords; +} SplitStruct; + +inline static double *reserve_coords(double **d_buffer, int n_dim) +{ + double *coords = *d_buffer; + (*d_buffer) += n_dim * 2; + return coords; +} + +static void mbr_join(double *a, const double *b, int n_dim) +{ + double *end = a + n_dim * 2; + do + { + if (a[0] > b[0]) + a[0] = b[0]; + + if (a[1] < b[1]) + a[1] = b[1]; + + a += 2; + b += 2; + }while (a != end); +} + +/* +Counts the square of mbr which is a join of a and b +*/ +static double mbr_join_square(const double *a, const double *b, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= + ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]); + + a += 2; + b += 2; + }while (a != end); + + return square; +} + +static double count_square(const double *a, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= a[1] - a[0]; + a += 2; + }while (a != end); + return square; +} + +inline static void copy_coords(double *dst, const double *src, int n_dim) +{ + memcpy(dst, src, sizeof(double) * (n_dim * 2)); +} + +/* +Select two nodes to collect group upon +*/ +static void pick_seeds(SplitStruct *node, int n_entries, + SplitStruct **seed_a, SplitStruct **seed_b, int n_dim) +{ + SplitStruct *cur1; + SplitStruct *lim1 = node + (n_entries - 1); + SplitStruct *cur2; + SplitStruct *lim2 = node + n_entries; + + double max_d = -DBL_MAX; + double d; + + for (cur1 = node; cur1 < lim1; ++cur1) + { + for (cur2=cur1 + 1; cur2 < lim2; ++cur2) + { + + d = mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square - + cur2->square; + if (d > max_d) + { + max_d = d; + *seed_a = cur1; + *seed_b = cur2; + } + } + } +} + +/* +Select next node and group where to add +*/ +static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2, + SplitStruct **choice, int *n_group, int n_dim) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + + double max_diff = -DBL_MAX; + + for (; curn_node) + { + continue; + } + + diff = mbr_join_square(g1, cur->coords, n_dim) - + mbr_join_square(g2, cur->coords, n_dim); + + abs_diff = fabs(diff); + if (abs_diff > max_diff) + { + max_diff = abs_diff; + *n_group = 1 + (diff > 0); + *choice = cur; + } + } +} + +/* +Mark not-in-group entries as n_group +*/ +static void mark_all_entries(SplitStruct *node, int n_entries, int n_group) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + for (; curn_node) + { + continue; + } + cur->n_node = n_group; + } +} + +static int split_maria_rtree_node(SplitStruct *node, int n_entries, + int all_size, /* Total key's size */ + int key_size, + int min_size, /* Minimal group size */ + int size1, int size2 /* initial group sizes */, + double **d_buffer, int n_dim) +{ + SplitStruct *cur; + SplitStruct *a; + SplitStruct *b; + double *g1 = reserve_coords(d_buffer, n_dim); + double *g2 = reserve_coords(d_buffer, n_dim); + SplitStruct *next; + int next_node; + int i; + SplitStruct *end = node + n_entries; + + if (all_size < min_size * 2) + { + return 1; + } + + cur = node; + for (; cursquare = count_square(cur->coords, n_dim); + cur->n_node = 0; + } + + pick_seeds(node, n_entries, &a, &b, n_dim); + a->n_node = 1; + b->n_node = 2; + + + copy_coords(g1, a->coords, n_dim); + size1 += key_size; + copy_coords(g2, b->coords, n_dim); + size2 += key_size; + + + for (i=n_entries - 2; i>0; --i) + { + if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */ + { + mark_all_entries(node, n_entries, 1); + break; + } + + if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */ + { + mark_all_entries(node, n_entries, 2); + break; + } + + pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim); + if (next_node == 1) + { + size1 += key_size; + mbr_join(g1, next->coords, n_dim); + } + else + { + size2 += key_size; + mbr_join(g2, next->coords, n_dim); + } + next->n_node = next_node; + } + + return 0; +} + +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, uchar *key, + uint key_length, my_off_t *new_page_offs) +{ + int n1, n2; /* Number of items in groups */ + + SplitStruct *task; + SplitStruct *cur; + SplitStruct *stop; + double *coord_buf; + double *next_coord; + double *old_coord; + int n_dim; + uchar *source_cur, *cur1, *cur2; + uchar *new_page; + int err_code= 0; + uint nod_flag= _ma_test_if_nod(page); + uint full_length= key_length + (nod_flag ? nod_flag : + info->s->base.rec_reflength); + int max_keys= (maria_getint(page)-2) / (full_length); + + n_dim = keyinfo->keysegs / 2; + + if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) * + (max_keys + 1 + 4) + + sizeof(SplitStruct) * (max_keys + 1)))) + return -1; + + task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4)); + + next_coord = coord_buf; + + stop = task + max_keys; + source_cur = rt_PAGE_FIRST_KEY(page, nod_flag); + + for (cur = task; cur < stop; ++cur, source_cur = rt_PAGE_NEXT_KEY(source_cur, + key_length, nod_flag)) + { + cur->coords = reserve_coords(&next_coord, n_dim); + cur->key = source_cur; + maria_rtree_d_mbr(keyinfo->seg, source_cur, key_length, cur->coords); + } + + cur->coords = reserve_coords(&next_coord, n_dim); + maria_rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords); + cur->key = key; + + old_coord = next_coord; + + if (split_maria_rtree_node(task, max_keys + 1, + maria_getint(page) + full_length + 2, full_length, + rt_PAGE_MIN_SIZE(keyinfo->block_length), + 2, 2, &next_coord, n_dim)) + { + err_code = 1; + goto split_err; + } + + if (!(new_page = (uchar*)my_alloca((uint)keyinfo->block_length))) + { + err_code= -1; + goto split_err; + } + + stop = task + (max_keys + 1); + cur1 = rt_PAGE_FIRST_KEY(page, nod_flag); + cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag); + + n1= n2 = 0; + for (cur = task; cur < stop; ++cur) + { + uchar *to; + if (cur->n_node == 1) + { + to = cur1; + cur1 = rt_PAGE_NEXT_KEY(cur1, key_length, nod_flag); + ++n1; + } + else + { + to = cur2; + cur2 = rt_PAGE_NEXT_KEY(cur2, key_length, nod_flag); + ++n2; + } + if (to != cur->key) + memcpy(to - nod_flag, cur->key - nod_flag, full_length); + } + + maria_putint(page, 2 + n1 * full_length, nod_flag); + maria_putint(new_page, 2 + n2 * full_length, nod_flag); + + if ((*new_page_offs= _ma_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + err_code= -1; + else + err_code= _ma_write_keypage(info, keyinfo, *new_page_offs, + DFLT_INIT_HITS, new_page); + + my_afree((byte*)new_page); + +split_err: + my_afree((byte*) coord_buf); + return err_code; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c new file mode 100644 index 00000000000..ca4825c2ce2 --- /dev/null +++ b/storage/maria/ma_rt_test.c @@ -0,0 +1,473 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA rtree table */ +/* Written by Alex Barkov who has a shared copyright to this code */ + + +#include "maria.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" + +#define MAX_REC_LENGTH 1024 +#define ndims 2 +#define KEYALG HA_KEY_ALG_RTREE + +static int read_with_pos(MARIA_HA * file, int silent); +static void create_record(char *record,uint rownr); +static void create_record1(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); +static int run_test(const char *filename); + +static double rt_data[]= +{ + /*1*/ 0,10,0,10, + /*2*/ 5,15,0,10, + /*3*/ 0,10,5,15, + /*4*/ 10,20,10,20, + /*5*/ 0,10,0,10, + /*6*/ 5,15,0,10, + /*7*/ 0,10,5,15, + /*8*/ 10,20,10,20, + /*9*/ 0,10,0,10, + /*10*/ 5,15,0,10, + /*11*/ 0,10,5,15, + /*12*/ 10,20,10,20, + /*13*/ 0,10,0,10, + /*14*/ 5,15,0,10, + /*15*/ 0,10,5,15, + /*16*/ 10,20,10,20, + /*17*/ 5,15,0,10, + /*18*/ 0,10,5,15, + /*19*/ 10,20,10,20, + /*20*/ 0,10,0,10, + + /*1*/ 100,110,0,10, + /*2*/ 105,115,0,10, + /*3*/ 100,110,5,15, + /*4*/ 110,120,10,20, + /*5*/ 100,110,0,10, + /*6*/ 105,115,0,10, + /*7*/ 100,110,5,15, + /*8*/ 110,120,10,20, + /*9*/ 100,110,0,10, + /*10*/ 105,115,0,10, + /*11*/ 100,110,5,15, + /*12*/ 110,120,10,20, + /*13*/ 100,110,0,10, + /*14*/ 105,115,0,10, + /*15*/ 100,110,5,15, + /*16*/ 110,120,10,20, + /*17*/ 105,115,0,10, + /*18*/ 100,110,5,15, + /*19*/ 110,120,10,20, + /*20*/ 100,110,0,10, + -1 +}; + +int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused))) +{ + MY_INIT(argv[0]); + maria_init(); + exit(run_test("rt_test")); +} + + +static int run_test(const char *filename) +{ + MARIA_HA *file; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + MARIA_COLUMNDEF recinfo[20]; + MARIA_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + key_range range; + + int silent=0; + int opt_unique=0; + int create_flag=0; + int key_type=HA_KEYTYPE_DOUBLE; + int key_length=8; + int null_fields=0; + int nrecords=sizeof(rt_data)/(sizeof(double)*4);/* 3000;*/ + int rec_length=0; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd= 10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + rec_length=1; + + /* Define 2*ndims columns for coordinates*/ + + for (i=1; i<=2*ndims ;i++){ + recinfo[i].type=FIELD_NORMAL; + recinfo[i].length=key_length; + rec_length+=key_length; + } + + /* Define a key with 2*ndims segments */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=2*ndims; + keyinfo[0].flag=0; + keyinfo[0].key_alg=KEYALG; + + for (i=0; i<2*ndims; i++){ + keyinfo[0].seg[i].type= key_type; + keyinfo[0].seg[i].flag=0; /* Things like HA_REVERSE_SORT */ + keyinfo[0].seg[i].start= (key_length*i)+1; + keyinfo[0].seg[i].length=key_length; + keyinfo[0].seg[i].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[i].null_pos=0; + keyinfo[0].seg[i].language=default_charset_info->number; + } + + if (!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (maria_create(filename, + 1, /* keys */ + keyinfo, + 1+2*ndims+opt_unique, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + if (!silent) + printf("- Open isam-file\n"); + + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i "); + print_record(record,maria_position(file),"\n"); + error=maria_update(file,read_record,record); + if (error) + { + printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Test maria_rkey then a sequence of maria_rnext_same\n"); + + create_record(record, nrecords*4/5); + print_record(record,0," search for\n"); + + if ((error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_INTERSECT))) + { + printf("maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rkey\n"); + row_count=1; + + for (;;) + { + if ((error=maria_rnext_same(file,read_record))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext_same\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_rfirst then a sequence of maria_rnext\n"); + + error=maria_rfirst(file,read_record,0); + if (error) + { + printf("maria_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,maria_position(file)," maria_frirst\n"); + + for (i=0;inextpos=info->s->pack.header_length; /* Read first record */ + info->lastinx= -1; /* Can't forward or backward */ + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); +} + +/* + Read a row based on position. + If filepos= HA_OFFSET_ERROR then read next row + Return values + Returns one of following values: + 0 = Ok. + HA_ERR_END_OF_FILE = EOF. +*/ + +int maria_scan(MARIA_HA *info, byte *buf) +{ + DBUG_ENTER("maria_scan"); + /* Init all but update-flag */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + DBUG_RETURN ((*info->s->read_rnd)(info,buf,info->nextpos,1)); +} diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c new file mode 100644 index 00000000000..3bb048ea239 --- /dev/null +++ b/storage/maria/ma_search.c @@ -0,0 +1,1894 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* key handling functions */ + +#include "ma_fulltext.h" +#include "m_ctype.h" + +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uchar *keypos, + uint *return_key_length); + + /* Check index */ + +int _ma_check_index(MARIA_HA *info, int inx) +{ + if (inx == -1) /* Use last index */ + inx=info->lastinx; + if (inx < 0 || ! maria_is_key_active(info->s->state.key_map, inx)) + { + my_errno=HA_ERR_WRONG_INDEX; + return -1; + } + if (info->lastinx != inx) /* Index changed */ + { + info->lastinx = inx; + info->page_changed=1; + info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) | + HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); + } + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + return(-1); + return(inx); +} /* _ma_check_index */ + + + /* + ** Search after row by a key + ** Position to row is stored in info->lastpos + ** Return: -1 if not found + ** 1 if one should continue search on higher level + */ + +int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uchar *key, uint key_len, uint nextflag, register my_off_t pos) +{ + my_bool last_key; + int error,flag; + uint nod_flag; + uchar *keypos,*maxpos; + uchar lastkey[HA_MAX_KEY_BUFF],*buff; + DBUG_ENTER("_ma_search"); + DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", + (ulong) pos, nextflag, (ulong) info->lastpos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_len);); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + info->lastpos= HA_OFFSET_ERROR; + if (!(nextflag & (SEARCH_SMALLER | SEARCH_BIGGER | SEARCH_LAST))) + DBUG_RETURN(-1); /* Not found ; return error */ + DBUG_RETURN(1); /* Search at upper levels */ + } + + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) + goto err; + DBUG_DUMP("page",(byte*) buff,maria_getint(buff)); + + flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, + &keypos,lastkey, &last_key); + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); + nod_flag=_ma_test_if_nod(buff); + maxpos=buff+maria_getint(buff)-1; + + if (flag) + { + if ((error= _ma_search(info,keyinfo,key,key_len,nextflag, + _ma_kpos(nod_flag,keypos))) <= 0) + DBUG_RETURN(error); + + if (flag >0) + { + if (nextflag & (SEARCH_SMALLER | SEARCH_LAST) && + keypos == buff+2+nod_flag) + DBUG_RETURN(1); /* Bigger than key */ + } + else if (nextflag & SEARCH_BIGGER && keypos >= maxpos) + DBUG_RETURN(1); /* Smaller than key */ + } + else + { + if ((nextflag & SEARCH_FIND) && nod_flag && + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || + key_len != USE_WHOLE_KEY)) + { + if ((error= _ma_search(info,keyinfo,key,key_len,SEARCH_FIND, + _ma_kpos(nod_flag,keypos))) >= 0 || + my_errno != HA_ERR_KEY_NOT_FOUND) + DBUG_RETURN(error); + info->last_keypage= HA_OFFSET_ERROR; /* Buffer not in mem */ + } + } + if (pos != info->last_keypage) + { + uchar *old_buff=buff; + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) + goto err; + keypos=buff+(keypos-old_buff); + maxpos=buff+(maxpos-old_buff); + } + + if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0) + { + uint not_used[2]; + if (_ma_get_prev_key(info,keyinfo, buff, info->lastkey, keypos, + &info->lastkey_length)) + goto err; + if (!(nextflag & SEARCH_SMALLER) && + ha_key_cmp(keyinfo->seg, info->lastkey, key, key_len, SEARCH_FIND, + not_used)) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + goto err; + } + } + else + { + info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey); + if (!info->lastkey_length) + goto err; + memcpy(info->lastkey,lastkey,info->lastkey_length); + } + info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + /* Save position for a possible read next / previous */ + info->int_keypos=info->buff+ (keypos-buff); + info->int_maxpos=info->buff+ (maxpos-buff); + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=0; + info->buff_used= (info->buff != buff); /* If we have to reread buff */ + + DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); + DBUG_RETURN(0); + +err: + DBUG_PRINT("exit",("Error: %d",my_errno)); + info->lastpos= HA_OFFSET_ERROR; + info->page_changed=1; + DBUG_RETURN (-1); +} /* _ma_search */ + + + /* Search after key in page-block */ + /* If packed key puts smaller or identical key in buff */ + /* ret_pos point to where find or bigger key starts */ + /* ARGSUSED */ + +int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, uchar **ret_pos, + uchar *buff __attribute__((unused)), my_bool *last_key) +{ + reg4 int start,mid,end,save_end; + int flag; + uint totlength,nod_flag,not_used[2]; + DBUG_ENTER("_ma_bin_search"); + + LINT_INIT(flag); + totlength=keyinfo->keylength+(nod_flag=_ma_test_if_nod(page)); + start=0; mid=1; + save_end=end=(int) ((maria_getint(page)-2-nod_flag)/totlength-1); + DBUG_PRINT("test",("maria_getint: %d end: %d",maria_getint(page),end)); + page+=2+nod_flag; + + while (start != end) + { + mid= (start+end)/2; + if ((flag=ha_key_cmp(keyinfo->seg,page+(uint) mid*totlength,key,key_len, + comp_flag, not_used)) + >= 0) + end=mid; + else + start=mid+1; + } + if (mid != start) + flag=ha_key_cmp(keyinfo->seg,page+(uint) start*totlength,key,key_len, + comp_flag, not_used); + if (flag < 0) + start++; /* point at next, bigger key */ + *ret_pos=page+(uint) start*totlength; + *last_key= end == save_end; + DBUG_PRINT("exit",("flag: %d keypos: %d",flag,start)); + DBUG_RETURN(flag); +} /* _ma_bin_search */ + + +/* + Locate a packed key in a key page. + + SYNOPSIS + _ma_seq_search() + info Open table information. + keyinfo Key definition information. + page Key page (beginning). + key Search key. + key_len Length to use from search key or USE_WHOLE_KEY + comp_flag Search flags like SEARCH_SAME etc. + ret_pos RETURN Position in key page behind this key. + buff RETURN Copy of previous or identical unpacked key. + last_key RETURN If key is last in page. + + DESCRIPTION + Used instead of _ma_bin_search() when key is packed. + Puts smaller or identical key in buff. + Key is searched sequentially. + + RETURN + > 0 Key in 'buff' is smaller than search key. + 0 Key in 'buff' is identical to search key. + < 0 Not found. +*/ + +int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, uchar **ret_pos, + uchar *buff, my_bool *last_key) +{ + int flag; + uint nod_flag,length,not_used[2]; + uchar t_buff[HA_MAX_KEY_BUFF],*end; + DBUG_ENTER("_ma_seq_search"); + + LINT_INIT(flag); LINT_INIT(length); + end= page+maria_getint(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + *ret_pos=page; + t_buff[0]=0; /* Avoid bugs */ + while (page < end) + { + length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff); + if (length == 0 || page > end) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_PRINT("error",("Found wrong key: length: %u page: %lx end: %lx", + length, (long) page, (long) end)); + DBUG_RETURN(MARIA_FOUND_WRONG_KEY); + } + if ((flag=ha_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag, + not_used)) >= 0) + break; +#ifdef EXTRA_DEBUG + DBUG_PRINT("loop",("page: %lx key: '%s' flag: %d", (long) page, t_buff, + flag)); +#endif + memcpy(buff,t_buff,length); + *ret_pos=page; + } + if (flag == 0) + memcpy(buff,t_buff,length); /* Result is first key */ + *last_key= page == end; + DBUG_PRINT("exit",("flag: %d ret_pos: %lx", flag, (long) *ret_pos)); + DBUG_RETURN(flag); +} /* _ma_seq_search */ + + +int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint nextflag, uchar **ret_pos, + uchar *buff, my_bool *last_key) +{ + /* + my_flag is raw comparison result to be changed according to + SEARCH_NO_FIND,SEARCH_LAST and HA_REVERSE_SORT flags. + flag is the value returned by ha_key_cmp and as treated as final + */ + int flag=0, my_flag=-1; + uint nod_flag, length, len, matched, cmplen, kseg_len; + uint prefix_len,suffix_len; + int key_len_skip, seg_len_pack, key_len_left; + uchar *end, *kseg, *vseg; + uchar *sort_order=keyinfo->seg->charset->sort_order; + uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + uchar *saved_from, *saved_to, *saved_vseg; + uint saved_length=0, saved_prefix_len=0; + uint length_pack; + DBUG_ENTER("_ma_prefix_search"); + + LINT_INIT(length); + LINT_INIT(prefix_len); + LINT_INIT(seg_len_pack); + LINT_INIT(saved_from); + LINT_INIT(saved_to); + LINT_INIT(saved_vseg); + + t_buff[0]=0; /* Avoid bugs */ + end= page+maria_getint(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + *ret_pos=page; + kseg=key; + + get_key_pack_length(kseg_len,length_pack,kseg); + key_len_skip=length_pack+kseg_len; + key_len_left=(int) key_len- (int) key_len_skip; + /* If key_len is 0, then lenght_pack is 1, then key_len_left is -1. */ + cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack; + DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); + + /* + Keys are compressed the following way: + + If the max length of first key segment <= 127 bytes the prefix is + 1 byte else it's 2 byte + + (prefix) length The high bit is set if this is a prefix for the prev key. + [suffix length] Packed length of suffix if the previous was a prefix. + (suffix) data Key data bytes (past the common prefix or whole segment). + [next-key-seg] Next key segments (([packed length], data), ...) + pointer Reference to the data file (last_keyseg->length). + */ + + matched=0; /* how many char's from prefix were alredy matched */ + len=0; /* length of previous key unpacked */ + + while (page < end) + { + uint packed= *page & 128; + + vseg=page; + if (keyinfo->seg->length >= 127) + { + suffix_len=mi_uint2korr(vseg) & 32767; + vseg+=2; + } + else + suffix_len= *vseg++ & 127; + + if (packed) + { + if (suffix_len == 0) + { + /* == 0x80 or 0x8000, same key, prefix length == old key length. */ + prefix_len=len; + } + else + { + /* > 0x80 or 0x8000, this is prefix lgt, packed suffix lgt follows. */ + prefix_len=suffix_len; + get_key_length(suffix_len,vseg); + } + } + else + { + /* Not packed. No prefix used from last key. */ + prefix_len=0; + } + + len=prefix_len+suffix_len; + seg_len_pack=get_pack_length(len); + t_buff=tt_buff+3-seg_len_pack; + store_key_length(t_buff,len); + + if (prefix_len > saved_prefix_len) + memcpy(t_buff+seg_len_pack+saved_prefix_len,saved_vseg, + prefix_len-saved_prefix_len); + saved_vseg=vseg; + saved_prefix_len=prefix_len; + + DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack, + suffix_len,vseg)); + { + uchar *from=vseg+suffix_len; + HA_KEYSEG *keyseg; + uint l; + + for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) + { + + if (keyseg->flag & HA_NULL_PART) + { + if (!(*from++)) + continue; + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + get_key_length(l,from); + } + else + l=keyseg->length; + + from+=l; + } + from+=keyseg->length; + page=from+nod_flag; + length=from-vseg; + } + + if (page > end) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_PRINT("error",("Found wrong key: length: %u page: %lx end: %lx", + length, (long) page, (long) end)); + DBUG_RETURN(MARIA_FOUND_WRONG_KEY); + } + + if (matched >= prefix_len) + { + /* We have to compare. But we can still skip part of the key */ + uint left; + uchar *k=kseg+prefix_len; + + /* + If prefix_len > cmplen then we are in the end-space comparison + phase. Do not try to acces the key any more ==> left= 0. + */ + left= ((len <= cmplen) ? suffix_len : + ((prefix_len < cmplen) ? cmplen - prefix_len : 0)); + + matched=prefix_len+left; + + if (sort_order) + { + for (my_flag=0;left;left--) + if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) + break; + } + else + { + for (my_flag=0;left;left--) + if ((my_flag= (int) *vseg++ - (int) *k++)) + break; + } + + if (my_flag>0) /* mismatch */ + break; + if (my_flag==0) /* match */ + { + /* + ** len cmplen seg_left_len more_segs + ** < matched=len; continue search + ** > = prefix ? found : (matched=len; continue search) + ** > < - ok, found + ** = < - ok, found + ** = = - ok, found + ** = = + next seg + */ + if (len < cmplen) + { + if ((keyinfo->seg->type != HA_KEYTYPE_TEXT && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT2)) + my_flag= -1; + else + { + /* We have to compare k and vseg as if they were space extended */ + uchar *end= k+ (cmplen - len); + for ( ; k < end && *k == ' '; k++) ; + if (k == end) + goto cmp_rest; /* should never happen */ + if (*k < (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } + } + else if (len > cmplen) + { + uchar *end; + if ((nextflag & SEARCH_PREFIX) && key_len_left == 0) + goto fix_flag; + + /* We have to compare k and vseg as if they were space extended */ + for (end=vseg + (len-cmplen) ; + vseg < end && *vseg == (uchar) ' '; + vseg++, matched++) ; + DBUG_ASSERT(vseg < end); + + if (*vseg > (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } + else + { + cmp_rest: + if (key_len_left>0) + { + uint not_used[2]; + if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, + k, key_len_left, nextflag, not_used)) >= 0) + break; + } + else + { + /* + at this line flag==-1 if the following lines were already + visited and 0 otherwise, i.e. flag <=0 here always !!! + */ + fix_flag: + DBUG_ASSERT(flag <= 0); + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) + flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; + if (flag>=0) + break; + } + } + } + matched-=left; + } + /* else (matched < prefix_len) ---> do nothing. */ + + memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); + saved_to=buff+saved_length; + saved_from=saved_vseg; + saved_length=length; + *ret_pos=page; + } + if (my_flag) + flag=(keyinfo->seg->flag & HA_REVERSE_SORT) ? -my_flag : my_flag; + if (flag == 0) + { + memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); + saved_to=buff+saved_length; + saved_from=saved_vseg; + saved_length=length; + } + if (saved_length) + memcpy(saved_to,saved_from,saved_length); + + *last_key= page == end; + + DBUG_PRINT("exit",("flag: %d ret_pos: %lx", flag, (long) *ret_pos)); + DBUG_RETURN(flag); +} /* _ma_prefix_search */ + + + /* Get pos to a key_block */ + +my_off_t _ma_kpos(uint nod_flag, uchar *after_key) +{ + after_key-=nod_flag; + switch (nod_flag) { +#if SIZEOF_OFF_T > 4 + case 7: + return mi_uint7korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 6: + return mi_uint6korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 5: + return mi_uint5korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; +#else + case 7: + after_key++; + case 6: + after_key++; + case 5: + after_key++; +#endif + case 4: + return ((my_off_t) mi_uint4korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH; + case 3: + return ((my_off_t) mi_uint3korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH; + case 2: + return (my_off_t) (mi_uint2korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH); + case 1: + return (uint) (*after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 0: /* At leaf page */ + default: /* Impossible */ + return(HA_OFFSET_ERROR); + } +} /* _kpos */ + + + /* Save pos to a key_block */ + +void _ma_kpointer(register MARIA_HA *info, register uchar *buff, my_off_t pos) +{ + pos/=MARIA_MIN_KEY_BLOCK_LENGTH; + switch (info->s->base.key_reflength) { +#if SIZEOF_OFF_T > 4 + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#else + case 7: *buff++=0; + /* fall trough */ + case 6: *buff++=0; + /* fall trough */ + case 5: *buff++=0; + /* fall trough */ +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,(uint) pos); break; + case 1: buff[0]= (uchar) pos; break; + default: abort(); /* impossible */ + } +} /* _ma_kpointer */ + + + /* Calc pos to a data-record from a key */ + + +my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, uchar *after_key) +{ + my_off_t pos; + after_key-=(nod_flag + info->s->rec_reflength); + switch (info->s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: pos= (my_off_t) mi_uint8korr(after_key); break; + case 7: pos= (my_off_t) mi_uint7korr(after_key); break; + case 6: pos= (my_off_t) mi_uint6korr(after_key); break; + case 5: pos= (my_off_t) mi_uint5korr(after_key); break; +#else + case 8: pos= (my_off_t) mi_uint4korr(after_key+4); break; + case 7: pos= (my_off_t) mi_uint4korr(after_key+3); break; + case 6: pos= (my_off_t) mi_uint4korr(after_key+2); break; + case 5: pos= (my_off_t) mi_uint4korr(after_key+1); break; +#endif + case 4: pos= (my_off_t) mi_uint4korr(after_key); break; + case 3: pos= (my_off_t) mi_uint3korr(after_key); break; + case 2: pos= (my_off_t) mi_uint2korr(after_key); break; + default: + pos=0L; /* Shut compiler up */ + } + return (info->s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? pos : + pos*info->s->base.pack_reclength; +} + + +/* Calc position from a record pointer ( in delete link chain ) */ + +my_off_t _ma_rec_pos(MARIA_SHARE *s, uchar *ptr) +{ + my_off_t pos; + switch (s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: + pos= (my_off_t) mi_uint8korr(ptr); + if (pos == HA_OFFSET_ERROR) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 7: + pos= (my_off_t) mi_uint7korr(ptr); + if (pos == (((my_off_t) 1) << 56) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 6: + pos= (my_off_t) mi_uint6korr(ptr); + if (pos == (((my_off_t) 1) << 48) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 5: + pos= (my_off_t) mi_uint5korr(ptr); + if (pos == (((my_off_t) 1) << 40) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; +#else + case 8: + case 7: + case 6: + case 5: + ptr+= (s->rec_reflength-4); + /* fall through */ +#endif + case 4: + pos= (my_off_t) mi_uint4korr(ptr); + if (pos == (my_off_t) (uint32) ~0L) + return HA_OFFSET_ERROR; + break; + case 3: + pos= (my_off_t) mi_uint3korr(ptr); + if (pos == (my_off_t) (1 << 24) -1) + return HA_OFFSET_ERROR; + break; + case 2: + pos= (my_off_t) mi_uint2korr(ptr); + if (pos == (my_off_t) (1 << 16) -1) + return HA_OFFSET_ERROR; + break; + default: abort(); /* Impossible */ + } + return ((s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? pos : + pos*s->base.pack_reclength); +} + + + /* save position to record */ + +void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos) +{ + if (!(info->s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) && + pos != HA_OFFSET_ERROR) + pos/=info->s->base.pack_reclength; + + switch (info->s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: mi_int8store(buff,pos); break; + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#else + case 8: *buff++=0; + /* fall trough */ + case 7: *buff++=0; + /* fall trough */ + case 6: *buff++=0; + /* fall trough */ + case 5: *buff++=0; + /* fall trough */ +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,(uint) pos); break; + default: abort(); /* Impossible */ + } +} /* _ma_dpointer */ + + + /* Get key from key-block */ + /* page points at previous key; its advanced to point at next key */ + /* key should contain previous key */ + /* Returns length of found key + pointers */ + /* nod_flag is a flag if we are on nod */ + + /* same as _ma_get_key but used with fixed length keys */ + +uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register uchar **page, register uchar *key) +{ + memcpy((byte*) key,(byte*) *page, + (size_t) (keyinfo->keylength+nod_flag)); + *page+=keyinfo->keylength+nod_flag; + return(keyinfo->keylength); +} /* _ma_get_static_key */ + + +/* + get key witch is packed against previous key or key with a NULL column. + + SYNOPSIS + _ma_get_pack_key() + keyinfo key definition information. + nod_flag If nod: Length of node pointer, else zero. + page_pos RETURN position in key page behind this key. + key IN/OUT in: prev key, out: unpacked key. + + RETURN + key_length + length of data pointer +*/ + +uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register uchar **page_pos, register uchar *key) +{ + reg1 HA_KEYSEG *keyseg; + uchar *start_key,*page=*page_pos; + uint length; + + start_key=key; + for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++) + { + if (keyseg->flag & HA_PACK_KEY) + { + /* key with length, packed to previous key */ + uchar *start=key; + uint packed= *page & 128,tot_length,rest_length; + if (keyseg->length >= 127) + { + length=mi_uint2korr(page) & 32767; + page+=2; + } + else + length= *page++ & 127; + + if (packed) + { + if (length > (uint) keyseg->length) + { + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; /* Error */ + } + if (length == 0) /* Same key */ + { + if (keyseg->flag & HA_NULL_PART) + *key++=1; /* Can't be NULL */ + get_key_length(length,key); + key+= length; /* Same diff_key as prev */ + if (length > keyseg->length) + { + DBUG_PRINT("error", + ("Found too long null packed key: %u of %u at %lx", + length, keyseg->length, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; + } + continue; + } + if (keyseg->flag & HA_NULL_PART) + { + key++; /* Skip null marker*/ + start++; + } + + get_key_length(rest_length,page); + tot_length=rest_length+length; + + /* If the stored length has changed, we must move the key */ + if (tot_length >= 255 && *start != 255) + { + /* length prefix changed from a length of one to a length of 3 */ + bmove_upp((char*) key+length+3,(char*) key+length+1,length); + *key=255; + mi_int2store(key+1,tot_length); + key+=3+length; + } + else if (tot_length < 255 && *start == 255) + { + bmove(key+1,key+3,length); + *key=tot_length; + key+=1+length; + } + else + { + store_key_length_inc(key,tot_length); + key+=length; + } + memcpy(key,page,rest_length); + page+=rest_length; + key+=rest_length; + continue; + } + else + { + if (keyseg->flag & HA_NULL_PART) + { + if (!length--) /* Null part */ + { + *key++=0; + continue; + } + *key++=1; /* Not null */ + } + } + if (length > (uint) keyseg->length) + { + DBUG_PRINT("error",("Found too long packed key: %u of %u at %lx", + length, keyseg->length, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; /* Error */ + } + store_key_length_inc(key,length); + } + else + { + if (keyseg->flag & HA_NULL_PART) + { + if (!(*key++ = *page++)) + continue; + } + if (keyseg->flag & + (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + uchar *tmp=page; + get_key_length(length,tmp); + length+=(uint) (tmp-page); + } + else + length=keyseg->length; + } + memcpy((byte*) key,(byte*) page,(size_t) length); + key+=length; + page+=length; + } + length=keyseg->length+nod_flag; + bmove((byte*) key,(byte*) page,length); + *page_pos= page+length; + return ((uint) (key-start_key)+keyseg->length); +} /* _ma_get_pack_key */ + + + +/* key that is packed relatively to previous */ + +uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register uchar **page_pos, register uchar *key) +{ + reg1 HA_KEYSEG *keyseg; + uchar *start_key,*page,*page_end,*from,*from_end; + uint length,tmp; + DBUG_ENTER("_ma_get_binary_pack_key"); + + page= *page_pos; + page_end=page+HA_MAX_KEY_BUFF+1; + start_key=key; + + /* + Keys are compressed the following way: + + prefix length Packed length of prefix for the prev key. (1 or 3 bytes) + for each key segment: + [is null] Null indicator if can be null (1 byte, zero means null) + [length] Packed length if varlength (1 or 3 bytes) + pointer Reference to the data file (last_keyseg->length). + */ + get_key_length(length,page); + if (length) + { + if (length > keyinfo->maxlength) + { + DBUG_PRINT("error",("Found too long binary packed key: %u of %u at %lx", + length, keyinfo->maxlength, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); /* Wrong key */ + } + from=key; from_end=key+length; + } + else + { + from=page; from_end=page_end; /* Not packed key */ + } + + /* + The trouble is that key is split in two parts: + The first part is in from ...from_end-1. + The second part starts at page + */ + for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + { + if (from == from_end) { from=page; from_end=page_end; } + if (!(*key++ = *from++)) + continue; /* Null part */ + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + /* Get length of dynamic length key part */ + if (from == from_end) { from=page; from_end=page_end; } + if ((length= (*key++ = *from++)) == 255) + { + if (from == from_end) { from=page; from_end=page_end; } + length= (uint) ((*key++ = *from++)) << 8; + if (from == from_end) { from=page; from_end=page_end; } + length+= (uint) ((*key++ = *from++)); + } + } + else + length=keyseg->length; + + if ((tmp=(uint) (from_end-from)) <= length) + { + key+=tmp; /* Use old key */ + length-=tmp; + from=page; from_end=page_end; + } + DBUG_PRINT("info",("key: %lx from: %lx length: %u", + (long) key, (long) from, length)); + memmove((byte*) key, (byte*) from, (size_t) length); + key+=length; + from+=length; + } + length=keyseg->length+nod_flag; + if ((tmp=(uint) (from_end-from)) <= length) + { + memcpy(key+tmp,page,length-tmp); /* Get last part of key */ + *page_pos= page+length-tmp; + } + else + { + if (from_end != page_end) + { + DBUG_PRINT("error",("Error when unpacking key")); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); /* Error */ + } + memcpy((byte*) key,(byte*) from,(size_t) length); + *page_pos= from+length; + } + DBUG_RETURN((uint) (key-start_key)+keyseg->length); +} + + + /* Get key at position without knowledge of previous key */ + /* Returns pointer to next key */ + +uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uchar *keypos, uint *return_key_length) +{ + uint nod_flag; + DBUG_ENTER("_ma_get_key"); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + bmove((byte*) key,(byte*) keypos,keyinfo->keylength+nod_flag); + DBUG_RETURN(keypos+keyinfo->keylength+nod_flag); + } + else + { + page+=2+nod_flag; + key[0]=0; /* safety */ + while (page <= keypos) + { + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); + if (*return_key_length == 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + } + DBUG_PRINT("exit",("page: %lx length: %u", (long) page, + *return_key_length)); + DBUG_RETURN(page); +} /* _ma_get_key */ + + + /* Get key at position without knowledge of previous key */ + /* Returns 0 if ok */ + +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uchar *keypos, + uint *return_key_length) +{ + uint nod_flag; + DBUG_ENTER("_ma_get_prev_key"); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + *return_key_length=keyinfo->keylength; + bmove((byte*) key,(byte*) keypos- *return_key_length-nod_flag, + *return_key_length); + DBUG_RETURN(0); + } + else + { + page+=2+nod_flag; + key[0]=0; /* safety */ + while (page < keypos) + { + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); + if (*return_key_length == 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} /* _ma_get_key */ + + + + /* Get last key from key-page */ + /* Return pointer to where key starts */ + +uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, + uchar *lastkey, uchar *endpos, uint *return_key_length) +{ + uint nod_flag; + uchar *lastpos; + DBUG_ENTER("_ma_get_last_key"); + DBUG_PRINT("enter",("page: %lx endpos: %lx", (long) page, (long) endpos)); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + lastpos=endpos-keyinfo->keylength-nod_flag; + *return_key_length=keyinfo->keylength; + if (lastpos > page) + bmove((byte*) lastkey,(byte*) lastpos,keyinfo->keylength+nod_flag); + } + else + { + lastpos=(page+=2+nod_flag); + lastkey[0]=0; + while (page < endpos) + { + lastpos=page; + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey); + if (*return_key_length == 0) + { + DBUG_PRINT("error",("Couldn't find last key: page: %lx", + (long) page)); + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + } + DBUG_PRINT("exit",("lastpos: %lx length: %u", (long) lastpos, + *return_key_length)); + DBUG_RETURN(lastpos); +} /* _ma_get_last_key */ + + + /* Calculate length of key */ + +uint _ma_keylength(MARIA_KEYDEF *keyinfo, register uchar *key) +{ + reg1 HA_KEYSEG *keyseg; + uchar *start; + + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + return (keyinfo->keylength); + + start=key; + for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + if (!*key++) + continue; + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint length; + get_key_length(length,key); + key+=length; + } + else + key+= keyseg->length; + } + return((uint) (key-start)+keyseg->length); +} /* _ma_keylength */ + + +/* + Calculate length of part key. + + Used in maria_rkey() to find the key found for the key-part that was used. + This is needed in case of multi-byte character sets where we may search + after '0xDF' but find 'ss' +*/ + +uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register uchar *key, + HA_KEYSEG *end) +{ + reg1 HA_KEYSEG *keyseg; + uchar *start= key; + + for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + if (!*key++) + continue; + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint length; + get_key_length(length,key); + key+=length; + } + else + key+= keyseg->length; + } + return (uint) (key-start); +} + + /* Move a key */ + +uchar *_ma_move_key(MARIA_KEYDEF *keyinfo, uchar *to, uchar *from) +{ + reg1 uint length; + memcpy((byte*) to, (byte*) from, + (size_t) (length= _ma_keylength(keyinfo,from))); + return to+length; +} + + /* Find next/previous record with same key */ + /* This can't be used when database is touched after last read */ + +int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uchar *key, uint key_length, uint nextflag, my_off_t pos) +{ + int error; + uint nod_flag; + uchar lastkey[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_search_next"); + DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu", + nextflag, (ulong) info->lastpos, + (ulong) info->int_keypos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_length);); + + /* Force full read if we are at last key or if we are not on a leaf + and the key tree has changed since we used it last time + Note that even if the key tree has changed since last read, we can use + the last read data from the leaf if we haven't used the buffer for + something else. + */ + + if (((nextflag & SEARCH_BIGGER) && info->int_keypos >= info->int_maxpos) || + info->page_changed || + (info->int_keytree_version != keyinfo->version && + (info->int_nod_flag || info->buff_used))) + DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, pos)); + + if (info->buff_used) + { + if (!_ma_fetch_keypage(info,keyinfo,info->last_search_keypage, + DFLT_INIT_HITS,info->buff,0)) + DBUG_RETURN(-1); + info->buff_used=0; + } + + /* Last used buffer is in info->buff */ + nod_flag=_ma_test_if_nod(info->buff); + + if (nextflag & SEARCH_BIGGER) /* Next key */ + { + my_off_t tmp_pos= _ma_kpos(nod_flag,info->int_keypos); + if (tmp_pos != HA_OFFSET_ERROR) + { + if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, tmp_pos)) <=0) + DBUG_RETURN(error); + } + memcpy(lastkey,key,key_length); + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag, + &info->int_keypos,lastkey))) + DBUG_RETURN(-1); + } + else /* Previous key */ + { + uint length; + /* Find start of previous key */ + info->int_keypos= _ma_get_last_key(info,keyinfo,info->buff,lastkey, + info->int_keypos, &length); + if (!info->int_keypos) + DBUG_RETURN(-1); + if (info->int_keypos == info->buff+2) + DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, pos)); + if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, + _ma_kpos(nod_flag,info->int_keypos))) <= 0) + DBUG_RETURN(error); + + /* QQ: We should be able to optimize away the following call */ + if (! _ma_get_last_key(info,keyinfo,info->buff,lastkey, + info->int_keypos,&info->lastkey_length)) + DBUG_RETURN(-1); + } + memcpy(info->lastkey,lastkey,info->lastkey_length); + info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); + DBUG_RETURN(0); +} /* _ma_search_next */ + + + /* Search after position for the first row in an index */ + /* This is stored in info->lastpos */ + +int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + register my_off_t pos) +{ + uint nod_flag; + uchar *page; + DBUG_ENTER("_ma_search_first"); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; + info->lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + + do + { + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,0)) + { + info->lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + nod_flag=_ma_test_if_nod(info->buff); + page=info->buff+2+nod_flag; + } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); + + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page, + info->lastkey))) + DBUG_RETURN(-1); /* Crashed */ + + info->int_keypos=page; info->int_maxpos=info->buff+maria_getint(info->buff)-1; + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=info->buff_used=0; + info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + + DBUG_PRINT("exit",("found key at %lu", (ulong) info->lastpos)); + DBUG_RETURN(0); +} /* _ma_search_first */ + + + /* Search after position for the last row in an index */ + /* This is stored in info->lastpos */ + +int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + register my_off_t pos) +{ + uint nod_flag; + uchar *buff,*page; + DBUG_ENTER("_ma_search_last"); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + info->lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + + buff=info->buff; + do + { + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0)) + { + info->lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + page= buff+maria_getint(buff); + nod_flag=_ma_test_if_nod(buff); + } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); + + if (!_ma_get_last_key(info,keyinfo,buff,info->lastkey,page, + &info->lastkey_length)) + DBUG_RETURN(-1); + info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + info->int_keypos=info->int_maxpos=page; + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=info->buff_used=0; + + DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); + DBUG_RETURN(0); +} /* _ma_search_last */ + + + +/**************************************************************************** +** +** Functions to store and pack a key in a page +** +** maria_calc_xx_key_length takes the following arguments: +** nod_flag If nod: Length of nod-pointer +** next_key Position to pos after the new key in buffer +** org_key Key that was before the next key in buffer +** prev_key Last key before current key +** key Key that will be stored +** s_temp Information how next key will be packed +****************************************************************************/ + +/* Static length key */ + +int +_ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, + uchar *next_pos __attribute__((unused)), + uchar *org_key __attribute__((unused)), + uchar *prev_key __attribute__((unused)), + uchar *key, MARIA_KEY_PARAM *s_temp) +{ + s_temp->key=key; + return (int) (s_temp->totlength=keyinfo->keylength+nod_flag); +} + +/* Variable length key */ + +int +_ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, + uchar *next_pos __attribute__((unused)), + uchar *org_key __attribute__((unused)), + uchar *prev_key __attribute__((unused)), + uchar *key, MARIA_KEY_PARAM *s_temp) +{ + s_temp->key=key; + return (int) (s_temp->totlength= _ma_keylength(keyinfo,key)+nod_flag); +} + +/* + length of key with a variable length first segment which is prefix + compressed (mariachk reports 'packed + stripped') + + Keys are compressed the following way: + + If the max length of first key segment <= 127 bytes the prefix is + 1 byte else it's 2 byte + + prefix byte(s) The high bit is set if this is a prefix for the prev key + length Packed length if the previous was a prefix byte + [length] data bytes ('length' bytes) + next-key-seg Next key segments + + If the first segment can have NULL: + The length is 0 for NULLS and 1+length for not null columns. + +*/ + +int +_ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar *next_key, + uchar *org_key, uchar *prev_key, uchar *key, + MARIA_KEY_PARAM *s_temp) +{ + reg1 HA_KEYSEG *keyseg; + int length; + uint key_length,ref_length,org_key_length=0, + length_pack,new_key_length,diff_flag,pack_marker; + uchar *start,*end,*key_end,*sort_order; + bool same_length; + + length_pack=s_temp->ref_length=s_temp->n_ref_length=s_temp->n_length=0; + same_length=0; keyseg=keyinfo->seg; + key_length= _ma_keylength(keyinfo,key)+nod_flag; + + sort_order=0; + if ((keyinfo->flag & HA_FULLTEXT) && + ((keyseg->type == HA_KEYTYPE_TEXT) || + (keyseg->type == HA_KEYTYPE_VARTEXT1) || + (keyseg->type == HA_KEYTYPE_VARTEXT2)) && + !use_strnxfrm(keyseg->charset)) + sort_order=keyseg->charset->sort_order; + + /* diff flag contains how many bytes is needed to pack key */ + if (keyseg->length >= 127) + { + diff_flag=2; + pack_marker=32768; + } + else + { + diff_flag= 1; + pack_marker=128; + } + s_temp->pack_marker=pack_marker; + + /* Handle the case that the first part have NULL values */ + if (keyseg->flag & HA_NULL_PART) + { + if (!*key++) + { + s_temp->key=key; + s_temp->ref_length=s_temp->key_length=0; + s_temp->totlength=key_length-1+diff_flag; + s_temp->next_key_pos=0; /* No next key */ + return (s_temp->totlength); + } + s_temp->store_not_null=1; + key_length--; /* We don't store NULL */ + if (prev_key && !*prev_key++) + org_key=prev_key=0; /* Can't pack against prev */ + else if (org_key) + org_key++; /* Skip NULL */ + } + else + s_temp->store_not_null=0; + s_temp->prev_key=org_key; + + /* The key part will start with a packed length */ + + get_key_pack_length(new_key_length,length_pack,key); + end=key_end= key+ new_key_length; + start=key; + + /* Calc how many characters are identical between this and the prev. key */ + if (prev_key) + { + get_key_length(org_key_length,prev_key); + s_temp->prev_key=prev_key; /* Pointer at data */ + /* Don't use key-pack if length == 0 */ + if (new_key_length && new_key_length == org_key_length) + same_length=1; + else if (new_key_length > org_key_length) + end=key + org_key_length; + + if (sort_order) /* SerG */ + { + while (key < end && sort_order[*key] == sort_order[*prev_key]) + { + key++; prev_key++; + } + } + else + { + while (key < end && *key == *prev_key) + { + key++; prev_key++; + } + } + } + + s_temp->key=key; + s_temp->key_length= (uint) (key_end-key); + + if (same_length && key == key_end) + { + /* identical variable length key */ + s_temp->ref_length= pack_marker; + length=(int) key_length-(int) (key_end-start)-length_pack; + length+= diff_flag; + if (next_key) + { /* Can't combine with next */ + s_temp->n_length= *next_key; /* Needed by _ma_store_key */ + next_key=0; + } + } + else + { + if (start != key) + { /* Starts as prev key */ + ref_length= (uint) (key-start); + s_temp->ref_length= ref_length + pack_marker; + length= (int) (key_length - ref_length); + + length-= length_pack; + length+= diff_flag; + length+= ((new_key_length-ref_length) >= 255) ? 3 : 1;/* Rest_of_key */ + } + else + { + s_temp->key_length+=s_temp->store_not_null; /* If null */ + length= key_length - length_pack+ diff_flag; + } + } + s_temp->totlength=(uint) length; + s_temp->prev_length=0; + DBUG_PRINT("test",("tot_length: %u length: %d uniq_key_length: %u", + key_length, length, s_temp->key_length)); + + /* If something after that hasn't length=0, test if we can combine */ + if ((s_temp->next_key_pos=next_key)) + { + uint packed,n_length; + + packed = *next_key & 128; + if (diff_flag == 2) + { + n_length= mi_uint2korr(next_key) & 32767; /* Length of next key */ + next_key+=2; + } + else + n_length= *next_key++ & 127; + if (!packed) + n_length-= s_temp->store_not_null; + + if (n_length || packed) /* Don't pack 0 length keys */ + { + uint next_length_pack, new_ref_length=s_temp->ref_length; + + if (packed) + { + /* If first key and next key is packed (only on delete) */ + if (!prev_key && org_key) + { + get_key_length(org_key_length,org_key); + key=start; + if (sort_order) /* SerG */ + { + while (key < end && sort_order[*key] == sort_order[*org_key]) + { + key++; org_key++; + } + } + else + { + while (key < end && *key == *org_key) + { + key++; org_key++; + } + } + if ((new_ref_length= (uint) (key - start))) + new_ref_length+=pack_marker; + } + + if (!n_length) + { + /* + We put a different key between two identical variable length keys + Extend next key to have same prefix as this key + */ + if (new_ref_length) /* prefix of previus key */ + { /* make next key longer */ + s_temp->part_of_prev_key= new_ref_length; + s_temp->prev_length= org_key_length - + (new_ref_length-pack_marker); + s_temp->n_ref_length= s_temp->part_of_prev_key; + s_temp->n_length= s_temp->prev_length; + n_length= get_pack_length(s_temp->prev_length); + s_temp->prev_key+= (new_ref_length - pack_marker); + length+= s_temp->prev_length + n_length; + } + else + { /* Can't use prev key */ + s_temp->part_of_prev_key=0; + s_temp->prev_length= org_key_length; + s_temp->n_ref_length=s_temp->n_length= org_key_length; + length+= org_key_length; + /* +get_pack_length(org_key_length); */ + } + return (int) length; + } + + ref_length=n_length; + get_key_pack_length(n_length,next_length_pack,next_key); + + /* Test if new keys has fewer characters that match the previous key */ + if (!new_ref_length) + { /* Can't use prev key */ + s_temp->part_of_prev_key= 0; + s_temp->prev_length= ref_length; + s_temp->n_ref_length= s_temp->n_length= n_length+ref_length; + /* s_temp->prev_key+= get_pack_length(org_key_length); */ + return (int) length+ref_length-next_length_pack; + } + if (ref_length+pack_marker > new_ref_length) + { + uint new_pack_length=new_ref_length-pack_marker; + /* We must copy characters from the original key to the next key */ + s_temp->part_of_prev_key= new_ref_length; + s_temp->prev_length= ref_length - new_pack_length; + s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length; + s_temp->prev_key+= new_pack_length; +/* +get_pack_length(org_key_length); */ + length= length-get_pack_length(ref_length)+ + get_pack_length(new_pack_length); + return (int) length + s_temp->prev_length; + } + } + else + { + /* Next key wasn't a prefix of previous key */ + ref_length=0; + next_length_pack=0; + } + DBUG_PRINT("test",("length: %d next_key: %lx", length, + (long) next_key)); + + { + uint tmp_length; + key=(start+=ref_length); + if (key+n_length < key_end) /* Normalize length based */ + key_end=key+n_length; + if (sort_order) /* SerG */ + { + while (key < key_end && sort_order[*key] == + sort_order[*next_key]) + { + key++; next_key++; + } + } + else + { + while (key < key_end && *key == *next_key) + { + key++; next_key++; + } + } + if (!(tmp_length=(uint) (key-start))) + { /* Key can't be re-packed */ + s_temp->next_key_pos=0; + return length; + } + ref_length+=tmp_length; + n_length-=tmp_length; + length-=tmp_length+next_length_pack; /* We gained these chars */ + } + if (n_length == 0 && ref_length == new_key_length) + { + s_temp->n_ref_length=pack_marker; /* Same as prev key */ + } + else + { + s_temp->n_ref_length=ref_length | pack_marker; + length+= get_pack_length(n_length); + s_temp->n_length=n_length; + } + } + } + return length; +} + + +/* Length of key which is prefix compressed */ + +int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar *next_key, + uchar *org_key, uchar *prev_key, uchar *key, + MARIA_KEY_PARAM *s_temp) +{ + uint length,key_length,ref_length; + + s_temp->totlength=key_length= _ma_keylength(keyinfo,key)+nod_flag; +#ifdef HAVE_purify + s_temp->n_length= s_temp->n_ref_length=0; /* For valgrind */ +#endif + s_temp->key=key; + s_temp->prev_key=org_key; + if (prev_key) /* If not first key in block */ + { + /* pack key against previous key */ + /* + As keys may be identical when running a sort in mariachk, we + have to guard against the case where keys may be identical + */ + uchar *end; + end=key+key_length; + for ( ; *key == *prev_key && key < end; key++,prev_key++) ; + s_temp->ref_length= ref_length=(uint) (key-s_temp->key); + length=key_length - ref_length + get_pack_length(ref_length); + } + else + { + /* No previous key */ + s_temp->ref_length=ref_length=0; + length=key_length+1; + } + if ((s_temp->next_key_pos=next_key)) /* If another key after */ + { + /* pack key against next key */ + uint next_length,next_length_pack; + get_key_pack_length(next_length,next_length_pack,next_key); + + /* If first key and next key is packed (only on delete) */ + if (!prev_key && org_key && next_length) + { + uchar *end; + for (key= s_temp->key, end=key+next_length ; + *key == *org_key && key < end; + key++,org_key++) ; + ref_length= (uint) (key - s_temp->key); + } + + if (next_length > ref_length) + { + /* We put a key with different case between two keys with the same prefix + Extend next key to have same prefix as + this key */ + s_temp->n_ref_length= ref_length; + s_temp->prev_length= next_length-ref_length; + s_temp->prev_key+= ref_length; + return (int) (length+ s_temp->prev_length - next_length_pack + + get_pack_length(ref_length)); + } + /* Check how many characters are identical to next key */ + key= s_temp->key+next_length; + while (*key++ == *next_key++) ; + if ((ref_length= (uint) (key - s_temp->key)-1) == next_length) + { + s_temp->next_key_pos=0; + return length; /* can't pack next key */ + } + s_temp->prev_length=0; + s_temp->n_ref_length=ref_length; + return (int) (length-(ref_length - next_length) - next_length_pack + + get_pack_length(ref_length)); + } + return (int) length; +} + + +/* +** store a key packed with _ma_calc_xxx_key_length in page-buffert +*/ + +/* store key without compression */ + +void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register uchar *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + memcpy((byte*) key_pos,(byte*) s_temp->key,(size_t) s_temp->totlength); +} + + +/* store variable length key with prefix compression */ + +#define store_pack_length(test,pos,length) { \ + if (test) { *((pos)++) = (uchar) (length); } else \ + { *((pos)++) = (uchar) ((length) >> 8); *((pos)++) = (uchar) (length); } } + + +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register uchar *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + uint length; + uchar *start; + + start=key_pos; + + if (s_temp->ref_length) + { + /* Packed against previous key */ + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->ref_length); + /* If not same key after */ + if (s_temp->ref_length != s_temp->pack_marker) + store_key_length_inc(key_pos,s_temp->key_length); + } + else + { + /* Not packed against previous key */ + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->key_length); + } + bmove((byte*) key_pos,(byte*) s_temp->key, + (length=s_temp->totlength-(uint) (key_pos-start))); + + if (!s_temp->next_key_pos) /* No following key */ + return; + key_pos+=length; + + if (s_temp->prev_length) + { + /* Extend next key because new key didn't have same prefix as prev key */ + if (s_temp->part_of_prev_key) + { + store_pack_length(s_temp->pack_marker == 128,key_pos, + s_temp->part_of_prev_key); + store_key_length_inc(key_pos,s_temp->n_length); + } + else + { + s_temp->n_length+= s_temp->store_not_null; + store_pack_length(s_temp->pack_marker == 128,key_pos, + s_temp->n_length); + } + memcpy(key_pos, s_temp->prev_key, s_temp->prev_length); + } + else if (s_temp->n_ref_length) + { + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_ref_length); + if (s_temp->n_ref_length == s_temp->pack_marker) + return; /* Identical key */ + store_key_length(key_pos,s_temp->n_length); + } + else + { + s_temp->n_length+= s_temp->store_not_null; + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_length); + } +} + + +/* variable length key with prefix compression */ + +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register uchar *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + store_key_length_inc(key_pos,s_temp->ref_length); + memcpy((char*) key_pos,(char*) s_temp->key+s_temp->ref_length, + (size_t) s_temp->totlength-s_temp->ref_length); + + if (s_temp->next_key_pos) + { + key_pos+=(uint) (s_temp->totlength-s_temp->ref_length); + store_key_length_inc(key_pos,s_temp->n_ref_length); + if (s_temp->prev_length) /* If we must extend key */ + { + memcpy(key_pos,s_temp->prev_key,s_temp->prev_length); + } + } +} diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c new file mode 100644 index 00000000000..df547792ffa --- /dev/null +++ b/storage/maria/ma_sort.c @@ -0,0 +1,1021 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Creates a index for a database by reading keys, sorting them and outputing + them in sorted order through MARIA_SORT_INFO functions. +*/ + +#include "ma_fulltext.h" +#include + +/* static variables */ + +#undef MIN_SORT_MEMORY +#undef MYF_RW +#undef DISK_BUFFER_SIZE + +#define MERGEBUFF 15 +#define MERGEBUFF2 31 +#define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD) +#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL) +#define DISK_BUFFER_SIZE (IO_SIZE*16) + + +/* + Pointers of functions for store and read keys from temp file +*/ + +extern void print_error _VARARGS((const char *fmt,...)); + +/* Functions defined in this file */ + +static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info,uint keys, + uchar **sort_keys, + DYNAMIC_ARRAY *buffpek,int *maxbuffer, + IO_CACHE *tempfile, + IO_CACHE *tempfile_for_exceptions); +static int NEAR_F write_keys(MARIA_SORT_PARAM *info,uchar **sort_keys, + uint count, BUFFPEK *buffpek,IO_CACHE *tempfile); +static int NEAR_F write_key(MARIA_SORT_PARAM *info, uchar *key, + IO_CACHE *tempfile); +static int NEAR_F write_index(MARIA_SORT_PARAM *info,uchar * *sort_keys, + uint count); +static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info,uint keys, + uchar * *sort_keys, + BUFFPEK *buffpek,int *maxbuffer, + IO_CACHE *t_file); +static uint NEAR_F read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F merge_buffers(MARIA_SORT_PARAM *info,uint keys, + IO_CACHE *from_file, IO_CACHE *to_file, + uchar * *sort_keys, BUFFPEK *lastbuff, + BUFFPEK *Fb, BUFFPEK *Tb); +static int NEAR_F merge_index(MARIA_SORT_PARAM *,uint,uchar **,BUFFPEK *, int, + IO_CACHE *); +static int flush_maria_ft_buf(MARIA_SORT_PARAM *info); + +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info,uchar **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile); +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info, IO_CACHE *to_file, + char *key, uint sort_length, uint count); +static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info, + IO_CACHE *to_file, + char* key, uint sort_length, + uint count); +static inline int +my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs); + +/* + Creates a index of sorted keys + + SYNOPSIS + _ma_create_index_by_sort() + info Sort parameters + no_messages Set to 1 if no output + sortbuff_size Size if sortbuffer to allocate + + RESULT + 0 ok + <> 0 Error +*/ + +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info,my_bool no_messages, + ulong sortbuff_size) +{ + int error,maxbuffer,skr; + uint memavl,old_memavl,keys,sort_length; + DYNAMIC_ARRAY buffpek; + ha_rows records; + uchar **sort_keys; + IO_CACHE tempfile, tempfile_for_exceptions; + DBUG_ENTER("_ma_create_index_by_sort"); + DBUG_PRINT("enter",("sort_length: %d", info->key_length)); + + if (info->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + info->write_keys=write_keys_varlen; + info->read_to_buffer=read_to_buffer_varlen; + info->write_key=write_merge_key_varlen; + } + else + { + info->write_keys=write_keys; + info->read_to_buffer=read_to_buffer; + info->write_key=write_merge_key; + } + + my_b_clear(&tempfile); + my_b_clear(&tempfile_for_exceptions); + bzero((char*) &buffpek,sizeof(buffpek)); + sort_keys= (uchar **) NULL; error= 1; + maxbuffer=1; + + memavl=max(sortbuff_size,MIN_SORT_MEMORY); + records= info->sort_info->max_records; + sort_length= info->key_length; + LINT_INIT(keys); + + while (memavl >= MIN_SORT_MEMORY) + { + if ((my_off_t) (records+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= records+1; + else + do + { + skr=maxbuffer; + if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/ + (sort_length+sizeof(char*))) <= 1) + { + _ma_check_print_error(info->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (records/(keys-1)+1)) != skr); + + if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ + HA_FT_MAXBYTELEN, MYF(0)))) + { + if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer, + maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= 0; + } + else + break; + } + old_memavl=memavl; + if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY) + memavl=MIN_SORT_MEMORY; + } + if (memavl < MIN_SORT_MEMORY) + { + _ma_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ + goto err; /* purecov: tested */ + } + (*info->lock_in_memory)(info->sort_info->param);/* Everything is allocated */ + + if (!no_messages) + printf(" - Searching for keys, allocating buffer for %d keys\n",keys); + + if ((records=find_all_keys(info,keys,sort_keys,&buffpek,&maxbuffer, + &tempfile,&tempfile_for_exceptions)) + == HA_POS_ERROR) + goto err; /* purecov: tested */ + if (maxbuffer == 0) + { + if (!no_messages) + printf(" - Dumping %lu keys\n", (ulong) records); + if (write_index(info,sort_keys, (uint) records)) + goto err; /* purecov: inspected */ + } + else + { + keys=(keys*(sort_length+sizeof(char*)))/sort_length; + if (maxbuffer >= MERGEBUFF2) + { + if (!no_messages) + printf(" - Merging %lu keys\n", (ulong) records); /* purecov: tested */ + if (merge_many_buff(info,keys,sort_keys, + dynamic_element(&buffpek,0,BUFFPEK *),&maxbuffer,&tempfile)) + goto err; /* purecov: inspected */ + } + if (flush_io_cache(&tempfile) || + reinit_io_cache(&tempfile,READ_CACHE,0L,0,0)) + goto err; /* purecov: inspected */ + if (!no_messages) + printf(" - Last merge and dumping keys\n"); /* purecov: tested */ + if (merge_index(info,keys,sort_keys,dynamic_element(&buffpek,0,BUFFPEK *), + maxbuffer,&tempfile)) + goto err; /* purecov: inspected */ + } + + if (flush_maria_ft_buf(info) || _ma_flush_pending_blocks(info)) + goto err; + + if (my_b_inited(&tempfile_for_exceptions)) + { + MARIA_HA *index=info->sort_info->info; + uint keyno=info->key; + uint key_length, ref_length=index->s->rec_reflength; + + if (!no_messages) + printf(" - Adding exceptions\n"); /* purecov: tested */ + if (flush_io_cache(&tempfile_for_exceptions) || + reinit_io_cache(&tempfile_for_exceptions,READ_CACHE,0L,0,0)) + goto err; + + while (!my_b_read(&tempfile_for_exceptions,(byte*)&key_length, + sizeof(key_length)) + && !my_b_read(&tempfile_for_exceptions,(byte*)sort_keys, + (uint) key_length)) + { + if (_ma_ck_write(index,keyno,(uchar*) sort_keys,key_length-ref_length)) + goto err; + } + } + + error =0; + +err: + if (sort_keys) + my_free((gptr) sort_keys,MYF(0)); + delete_dynamic(&buffpek); + close_cached_file(&tempfile); + close_cached_file(&tempfile_for_exceptions); + + DBUG_RETURN(error ? -1 : 0); +} /* _ma_create_index_by_sort */ + + +/* Search after all keys and place them in a temp. file */ + +static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, + uchar **sort_keys, DYNAMIC_ARRAY *buffpek, + int *maxbuffer, IO_CACHE *tempfile, + IO_CACHE *tempfile_for_exceptions) +{ + int error; + uint idx; + DBUG_ENTER("find_all_keys"); + + idx=error=0; + sort_keys[0]=(uchar*) (sort_keys+keys); + + while (!(error=(*info->key_read)(info,sort_keys[idx]))) + { + if (info->real_key_length > info->key_length) + { + if (write_key(info,sort_keys[idx],tempfile_for_exceptions)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + continue; + } + + if (++idx == keys) + { + if (info->write_keys(info,sort_keys,idx-1,(BUFFPEK *)alloc_dynamic(buffpek), + tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + + sort_keys[0]=(uchar*) (sort_keys+keys); + memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); + idx=1; + } + sort_keys[idx]=sort_keys[idx-1]+info->key_length; + } + if (error > 0) + DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */ + if (buffpek->elements) + { + if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek), + tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + *maxbuffer=buffpek->elements-1; + } + else + *maxbuffer=0; + + DBUG_RETURN((*maxbuffer)*(keys-1)+idx); +} /* find_all_keys */ + + +#ifdef THREAD +/* Search after all keys and place them in a temp. file */ + +pthread_handler_t _ma_thr_find_all_keys(void *arg) +{ + MARIA_SORT_PARAM *info= (MARIA_SORT_PARAM*) arg; + int error; + uint memavl,old_memavl,keys,sort_length; + uint idx, maxbuffer; + uchar **sort_keys=0; + + LINT_INIT(keys); + + error=1; + + if (my_thread_init()) + goto err; + if (info->sort_info->got_error) + goto err; + + if (info->keyinfo->flag && HA_VAR_LENGTH_KEY) + { + info->write_keys=write_keys_varlen; + info->read_to_buffer=read_to_buffer_varlen; + info->write_key=write_merge_key_varlen; + } + else + { + info->write_keys=write_keys; + info->read_to_buffer=read_to_buffer; + info->write_key=write_merge_key; + } + + my_b_clear(&info->tempfile); + my_b_clear(&info->tempfile_for_exceptions); + bzero((char*) &info->buffpek,sizeof(info->buffpek)); + bzero((char*) &info->unique, sizeof(info->unique)); + sort_keys= (uchar **) NULL; + + memavl=max(info->sortbuff_size, MIN_SORT_MEMORY); + idx= info->sort_info->max_records; + sort_length= info->key_length; + maxbuffer= 1; + + while (memavl >= MIN_SORT_MEMORY) + { + if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= idx+1; + else + { + uint skr; + do + { + skr=maxbuffer; + if (memavl < sizeof(BUFFPEK)*maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ + (sort_length+sizeof(char*))) <= 1) + { + _ma_check_print_error(info->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); + } + if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ + ((info->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) + { + if (my_init_dynamic_array(&info->buffpek, sizeof(BUFFPEK), + maxbuffer, maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= (uchar **) NULL; /* for err: label */ + } + else + break; + } + old_memavl=memavl; + if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY) + memavl=MIN_SORT_MEMORY; + } + if (memavl < MIN_SORT_MEMORY) + { + _ma_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ + goto err; /* purecov: tested */ + } + + if (info->sort_info->param->testflag & T_VERBOSE) + printf("Key %d - Allocating buffer for %d keys\n",info->key+1,keys); + info->sort_keys=sort_keys; + + idx=error=0; + sort_keys[0]=(uchar*) (sort_keys+keys); + + while (!(error=info->sort_info->got_error) && + !(error=(*info->key_read)(info,sort_keys[idx]))) + { + if (info->real_key_length > info->key_length) + { + if (write_key(info,sort_keys[idx], &info->tempfile_for_exceptions)) + goto err; + continue; + } + + if (++idx == keys) + { + if (info->write_keys(info,sort_keys,idx-1, + (BUFFPEK *)alloc_dynamic(&info->buffpek), + &info->tempfile)) + goto err; + sort_keys[0]=(uchar*) (sort_keys+keys); + memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); + idx=1; + } + sort_keys[idx]=sort_keys[idx-1]+info->key_length; + } + if (error > 0) + goto err; + if (info->buffpek.elements) + { + if (info->write_keys(info,sort_keys, idx, + (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile)) + goto err; + info->keys=(info->buffpek.elements-1)*(keys-1)+idx; + } + else + info->keys=idx; + + info->sort_keys_length=keys; + goto ok; + +err: + info->sort_info->got_error=1; /* no need to protect this with a mutex */ + if (sort_keys) + my_free((gptr) sort_keys,MYF(0)); + info->sort_keys=0; + delete_dynamic(& info->buffpek); + close_cached_file(&info->tempfile); + close_cached_file(&info->tempfile_for_exceptions); + +ok: + remove_io_thread(&info->read_cache); + pthread_mutex_lock(&info->sort_info->mutex); + info->sort_info->threads_running--; + pthread_cond_signal(&info->sort_info->cond); + pthread_mutex_unlock(&info->sort_info->mutex); + my_thread_end(); + return NULL; +} + + +int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) +{ + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + ulong length, keys; + ulong *rec_per_key_part=param->rec_per_key_part; + int got_error=sort_info->got_error; + uint i; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + MARIA_SORT_PARAM *sinfo; + byte *mergebuf=0; + LINT_INIT(length); + + for (i= 0, sinfo= sort_param ; + i < sort_info->total_keys ; + i++, rec_per_key_part+=sinfo->keyinfo->keysegs, sinfo++) + { + if (!sinfo->sort_keys) + { + got_error=1; + continue; + } + if (!got_error) + { + maria_set_key_active(share->state.key_map, sinfo->key); + if (param->testflag & T_STATISTICS) + maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sinfo->notnull: NULL, + (ulonglong) info->state->records); + + + if (!sinfo->buffpek.elements) + { + if (param->testflag & T_VERBOSE) + { + printf("Key %d - Dumping %u keys\n",sinfo->key+1, sinfo->keys); + fflush(stdout); + } + if (write_index(sinfo, sinfo->sort_keys, sinfo->keys) || + flush_maria_ft_buf(sinfo) || _ma_flush_pending_blocks(sinfo)) + got_error=1; + } + } + my_free((gptr) sinfo->sort_keys,MYF(0)); + my_free(_ma_get_rec_buff_ptr(info, sinfo->rec_buff), + MYF(MY_ALLOW_ZERO_PTR)); + sinfo->sort_keys=0; + } + + for (i= 0, sinfo= sort_param ; + i < sort_info->total_keys ; + i++, + delete_dynamic(&sinfo->buffpek), + close_cached_file(&sinfo->tempfile), + close_cached_file(&sinfo->tempfile_for_exceptions), + sinfo++) + { + if (got_error) + continue; + if (sinfo->keyinfo->flag && HA_VAR_LENGTH_KEY) + { + sinfo->write_keys=write_keys_varlen; + sinfo->read_to_buffer=read_to_buffer_varlen; + sinfo->write_key=write_merge_key_varlen; + } + else + { + sinfo->write_keys=write_keys; + sinfo->read_to_buffer=read_to_buffer; + sinfo->write_key=write_merge_key; + } + if (sinfo->buffpek.elements) + { + uint maxbuffer=sinfo->buffpek.elements-1; + if (!mergebuf) + { + length=param->sort_buffer_length; + while (length >= MIN_SORT_MEMORY && !mergebuf) + { + mergebuf=my_malloc(length, MYF(0)); + length=length*3/4; + } + if (!mergebuf) + { + got_error=1; + continue; + } + } + keys=length/sinfo->key_length; + if (maxbuffer >= MERGEBUFF2) + { + if (param->testflag & T_VERBOSE) + printf("Key %d - Merging %u keys\n",sinfo->key+1, sinfo->keys); + if (merge_many_buff(sinfo, keys, (uchar **)mergebuf, + dynamic_element(&sinfo->buffpek, 0, BUFFPEK *), + (int*) &maxbuffer, &sinfo->tempfile)) + { + got_error=1; + continue; + } + } + if (flush_io_cache(&sinfo->tempfile) || + reinit_io_cache(&sinfo->tempfile,READ_CACHE,0L,0,0)) + { + got_error=1; + continue; + } + if (param->testflag & T_VERBOSE) + printf("Key %d - Last merge and dumping keys\n", sinfo->key+1); + if (merge_index(sinfo, keys, (uchar **)mergebuf, + dynamic_element(&sinfo->buffpek,0,BUFFPEK *), + maxbuffer,&sinfo->tempfile) || + flush_maria_ft_buf(sinfo) || + _ma_flush_pending_blocks(sinfo)) + { + got_error=1; + continue; + } + } + if (my_b_inited(&sinfo->tempfile_for_exceptions)) + { + uint key_length; + + if (param->testflag & T_VERBOSE) + printf("Key %d - Dumping 'long' keys\n", sinfo->key+1); + + if (flush_io_cache(&sinfo->tempfile_for_exceptions) || + reinit_io_cache(&sinfo->tempfile_for_exceptions,READ_CACHE,0L,0,0)) + { + got_error=1; + continue; + } + + while (!got_error && + !my_b_read(&sinfo->tempfile_for_exceptions,(byte*)&key_length, + sizeof(key_length))) + { + byte maria_ft_buf[HA_FT_MAXBYTELEN + HA_FT_WLEN + 10]; + if (key_length > sizeof(maria_ft_buf) || + my_b_read(&sinfo->tempfile_for_exceptions, (byte*)maria_ft_buf, + (uint)key_length) || + _ma_ck_write(info, sinfo->key, (uchar*)maria_ft_buf, + key_length - info->s->rec_reflength)) + got_error=1; + } + } + } + my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR)); + return got_error; +} +#endif /* THREAD */ + + /* Write all keys in memory to file for later merge */ + +static int NEAR_F write_keys(MARIA_SORT_PARAM *info, register uchar **sort_keys, + uint count, BUFFPEK *buffpek, IO_CACHE *tempfile) +{ + uchar **end; + uint sort_length=info->key_length; + DBUG_ENTER("write_keys"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if (my_b_write(tempfile,(byte*) *sort_keys,(uint) sort_length)) + DBUG_RETURN(1); /* purecov: inspected */ + } + DBUG_RETURN(0); +} /* write_keys */ + + +static inline int +my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs) +{ + int err; + uint16 len = _ma_keylength(info->keyinfo, (uchar*) bufs); + + /* The following is safe as this is a local file */ + if ((err= my_b_write(to_file, (byte*)&len, sizeof(len)))) + return (err); + if ((err= my_b_write(to_file,bufs, (uint) len))) + return (err); + return (0); +} + + +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, + register uchar **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile) +{ + uchar **end; + int err; + DBUG_ENTER("write_keys_varlen"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if ((err= my_var_write(info,tempfile, (byte*) *sort_keys))) + DBUG_RETURN(err); + } + DBUG_RETURN(0); +} /* write_keys_varlen */ + + +static int NEAR_F write_key(MARIA_SORT_PARAM *info, uchar *key, + IO_CACHE *tempfile) +{ + uint key_length=info->real_key_length; + DBUG_ENTER("write_key"); + + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); + + if (my_b_write(tempfile,(byte*)&key_length,sizeof(key_length)) || + my_b_write(tempfile,(byte*)key,(uint) key_length)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} /* write_key */ + + +/* Write index */ + +static int NEAR_F write_index(MARIA_SORT_PARAM *info, register uchar **sort_keys, + register uint count) +{ + DBUG_ENTER("write_index"); + + qsort2((gptr) sort_keys,(size_t) count,sizeof(byte*), + (qsort2_cmp) info->key_cmp,info); + while (count--) + { + if ((*info->key_write)(info,*sort_keys++)) + DBUG_RETURN(-1); /* purecov: inspected */ + } + DBUG_RETURN(0); +} /* write_index */ + + + /* Merge buffers to make < MERGEBUFF2 buffers */ + +static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info, uint keys, + uchar **sort_keys, BUFFPEK *buffpek, + int *maxbuffer, IO_CACHE *t_file) +{ + register int i; + IO_CACHE t_file2, *from_file, *to_file, *temp; + BUFFPEK *lastbuff; + DBUG_ENTER("merge_many_buff"); + + if (*maxbuffer < MERGEBUFF2) + DBUG_RETURN(0); /* purecov: inspected */ + if (flush_io_cache(t_file) || + open_cached_file(&t_file2,my_tmpdir(info->tmpdir),"ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + from_file= t_file ; to_file= &t_file2; + while (*maxbuffer >= MERGEBUFF2) + { + reinit_io_cache(from_file,READ_CACHE,0L,0,0); + reinit_io_cache(to_file,WRITE_CACHE,0L,0,0); + lastbuff=buffpek; + for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF) + { + if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, + buffpek+i,buffpek+i+MERGEBUFF-1)) + break; /* purecov: inspected */ + } + if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, + buffpek+i,buffpek+ *maxbuffer)) + break; /* purecov: inspected */ + if (flush_io_cache(to_file)) + break; /* purecov: inspected */ + temp=from_file; from_file=to_file; to_file=temp; + *maxbuffer= (int) (lastbuff-buffpek)-1; + } + close_cached_file(to_file); /* This holds old result */ + if (to_file == t_file) + *t_file=t_file2; /* Copy result file */ + + DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */ +} /* merge_many_buff */ + + +/* + Read data to buffer + + SYNOPSIS + read_to_buffer() + fromfile File to read from + buffpek Where to read from + sort_length max length to read + RESULT + > 0 Ammount of bytes read + -1 Error +*/ + +static uint NEAR_F read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint length; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + if (my_pread(fromfile->file,(byte*) buffpek->base, + (length= sort_length*count),buffpek->file_pos,MYF_RW)) + return((uint) -1); /* purecov: inspected */ + buffpek->key=buffpek->base; + buffpek->file_pos+= length; /* New filepos */ + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer */ + +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint16 length_of_key = 0; + uint idx; + uchar *buffp; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + buffp = buffpek->base; + + for (idx=1;idx<=count;idx++) + { + if (my_pread(fromfile->file,(byte*)&length_of_key,sizeof(length_of_key), + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=sizeof(length_of_key); + if (my_pread(fromfile->file,(byte*) buffp,length_of_key, + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=length_of_key; + buffp = buffp + sort_length; + } + buffpek->key=buffpek->base; + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer_varlen */ + + +static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info, + IO_CACHE *to_file,char* key, + uint sort_length, uint count) +{ + uint idx; + + char *bufs = key; + for (idx=1;idx<=count;idx++) + { + int err; + if ((err= my_var_write(info,to_file, (byte*) bufs))) + return (err); + bufs=bufs+sort_length; + } + return(0); +} + + +static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)), + IO_CACHE *to_file, char* key, + uint sort_length, uint count) +{ + return my_b_write(to_file,(byte*) key,(uint) sort_length*count); +} + +/* + Merge buffers to one buffer + If to_file == 0 then use info->key_write +*/ + +static int NEAR_F +merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, + IO_CACHE *to_file, uchar **sort_keys, BUFFPEK *lastbuff, + BUFFPEK *Fb, BUFFPEK *Tb) +{ + int error; + uint sort_length,maxcount; + ha_rows count; + my_off_t to_start_filepos; + uchar *strpos; + BUFFPEK *buffpek,**refpek; + QUEUE queue; + volatile int *killed= _ma_killed_ptr(info->sort_info->param); + + DBUG_ENTER("merge_buffers"); + + count=error=0; + maxcount=keys/((uint) (Tb-Fb) +1); + LINT_INIT(to_start_filepos); + if (to_file) + to_start_filepos=my_b_tell(to_file); + strpos=(uchar*) sort_keys; + sort_length=info->key_length; + + if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0, + (int (*)(void*, byte *,byte*)) info->key_cmp, + (void*) info)) + DBUG_RETURN(1); /* purecov: inspected */ + + for (buffpek= Fb ; buffpek <= Tb ; buffpek++) + { + count+= buffpek->count; + buffpek->base= strpos; + buffpek->max_keys=maxcount; + strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek, + sort_length)); + if (error == -1) + goto err; /* purecov: inspected */ + queue_insert(&queue,(char*) buffpek); + } + + while (queue.elements > 1) + { + for (;;) + { + if (*killed) + { + error=1; goto err; + } + buffpek=(BUFFPEK*) queue_top(&queue); + if (to_file) + { + if (info->write_key(info,to_file,(byte*) buffpek->key, + (uint) sort_length,1)) + { + error=1; goto err; /* purecov: inspected */ + } + } + else + { + if ((*info->key_write)(info,(void*) buffpek->key)) + { + error=1; goto err; /* purecov: inspected */ + } + } + buffpek->key+=sort_length; + if (! --buffpek->mem_count) + { + if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) + { + uchar *base=buffpek->base; + uint max_keys=buffpek->max_keys; + + VOID(queue_remove(&queue,0)); + + /* Put room used by buffer to use in other buffer */ + for (refpek= (BUFFPEK**) &queue_top(&queue); + refpek <= (BUFFPEK**) &queue_end(&queue); + refpek++) + { + buffpek= *refpek; + if (buffpek->base+buffpek->max_keys*sort_length == base) + { + buffpek->max_keys+=max_keys; + break; + } + else if (base+max_keys*sort_length == buffpek->base) + { + buffpek->base=base; + buffpek->max_keys+=max_keys; + break; + } + } + break; /* One buffer have been removed */ + } + } + else if (error == -1) + goto err; /* purecov: inspected */ + queue_replaced(&queue); /* Top element has been replaced */ + } + } + buffpek=(BUFFPEK*) queue_top(&queue); + buffpek->base=(uchar *) sort_keys; + buffpek->max_keys=keys; + do + { + if (to_file) + { + if (info->write_key(info,to_file,(byte*) buffpek->key, + sort_length,buffpek->mem_count)) + { + error=1; goto err; /* purecov: inspected */ + } + } + else + { + register uchar *end; + strpos= buffpek->key; + for (end=strpos+buffpek->mem_count*sort_length; + strpos != end ; + strpos+=sort_length) + { + if ((*info->key_write)(info,(void*) strpos)) + { + error=1; goto err; /* purecov: inspected */ + } + } + } + } + while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) != -1 && + error != 0); + + lastbuff->count=count; + if (to_file) + lastbuff->file_pos=to_start_filepos; +err: + delete_queue(&queue); + DBUG_RETURN(error); +} /* merge_buffers */ + + + /* Do a merge to output-file (save only positions) */ + +static int NEAR_F +merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys, + BUFFPEK *buffpek, int maxbuffer, IO_CACHE *tempfile) +{ + DBUG_ENTER("merge_index"); + if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek, + buffpek+maxbuffer)) + DBUG_RETURN(1); /* purecov: inspected */ + DBUG_RETURN(0); +} /* merge_index */ + +static int +flush_maria_ft_buf(MARIA_SORT_PARAM *info) +{ + int err=0; + if (info->sort_info->ft_buf) + { + err=_ma_sort_ft_buf_flush(info); + my_free((gptr)info->sort_info->ft_buf, MYF(0)); + info->sort_info->ft_buf=0; + } + return err; +} + diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h new file mode 100644 index 00000000000..a7e282f0ddc --- /dev/null +++ b/storage/maria/ma_sp_defs.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _SP_DEFS_H +#define _SP_DEFS_H + +#define SPDIMS 2 +#define SPTYPE HA_KEYTYPE_DOUBLE +#define SPLEN 8 + +#ifdef HAVE_SPATIAL + +enum wkbType +{ + wkbPoint = 1, + wkbLineString = 2, + wkbPolygon = 3, + wkbMultiPoint = 4, + wkbMultiLineString = 5, + wkbMultiPolygon = 6, + wkbGeometryCollection = 7 +}; + +enum wkbByteOrder +{ + wkbXDR = 0, /* Big Endian */ + wkbNDR = 1 /* Little Endian */ +}; + +uint sp_make_key(register MARIA_HA *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos); + +#endif /*HAVE_SPATIAL*/ +#endif /* _SP_DEFS_H */ diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c new file mode 100644 index 00000000000..b9841fed1e7 --- /dev/null +++ b/storage/maria/ma_sp_key.c @@ -0,0 +1,300 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_SPATIAL + +#include "ma_sp_defs.h" + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top); +static int sp_mbr_from_wkb(uchar (*wkb), uint size, uint n_dims, double *mbr); + +static void get_double(double *d, const byte *pos) +{ + float8get(*d, pos); +} + +uint sp_make_key(register MARIA_HA *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos) +{ + HA_KEYSEG *keyseg; + MARIA_KEYDEF *keyinfo = &info->s->keyinfo[keynr]; + uint len = 0; + byte *pos; + uint dlen; + uchar *dptr; + double mbr[SPDIMS * 2]; + uint i; + + keyseg = &keyinfo->seg[-1]; + pos = (byte*)record + keyseg->start; + + dlen = _ma_calc_blob_length(keyseg->bit_start, pos); + memcpy_fixed(&dptr, pos + keyseg->bit_start, sizeof(char*)); + if (!dptr) + { + my_errno= HA_ERR_NULL_IN_SPATIAL; + return 0; + } + sp_mbr_from_wkb(dptr + 4, dlen - 4, SPDIMS, mbr); /* SRID */ + + for (i = 0, keyseg = keyinfo->seg; keyseg->type; keyseg++, i++) + { + uint length = keyseg->length; + + pos = ((byte*)mbr) + keyseg->start; + if (keyseg->flag & HA_SWAP_KEY) + { +#ifdef HAVE_ISNAN + if (keyseg->type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr, pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key, length); + key+= length; + continue; + } + } + else if (keyseg->type == HA_KEYTYPE_DOUBLE) + { + double nr; + get_double(&nr, pos); + if (isnan(nr)) + { + bzero(key, length); + key+= length; + continue; + } + } +#endif + pos += length; + while (length--) + { + *key++ = *--pos; + } + } + else + { + memcpy((byte*)key, pos, length); + key += keyseg->length; + } + len += keyseg->length; + } + _ma_dpointer(info, key, filepos); + return len; +} + +/* +Calculate minimal bounding rectangle (mbr) of the spatial object +stored in "well-known binary representation" (wkb) format. +*/ +static int sp_mbr_from_wkb(uchar *wkb, uint size, uint n_dims, double *mbr) +{ + uint i; + + for (i=0; i < n_dims; ++i) + { + mbr[i * 2] = DBL_MAX; + mbr[i * 2 + 1] = -DBL_MAX; + } + + return sp_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1); +} + +/* + Add one point stored in wkb to mbr +*/ + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order __attribute__((unused)), + double *mbr) +{ + double ord; + double *mbr_end= mbr + n_dims * 2; + + while (mbr < mbr_end) + { + if ((*wkb) > end - 8) + return -1; + get_double(&ord, (const byte*) *wkb); + (*wkb)+= 8; + if (ord < *mbr) + float8store((char*) mbr, ord); + mbr++; + if (ord > *mbr) + float8store((char*) mbr, ord); + mbr++; + } + return 0; +} + + +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + return sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr); +} + + +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_points; + + n_points = uint4korr(*wkb); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + return 0; +} + + +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_linear_rings; + uint n_points; + + n_linear_rings = uint4korr((*wkb)); + (*wkb) += 4; + + for (; n_linear_rings > 0; --n_linear_rings) + { + n_points = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + } + return 0; +} + +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top) +{ + int res; + uchar byte_order; + uint wkb_type; + + byte_order = *(*wkb); + ++(*wkb); + + wkb_type = uint4korr((*wkb)); + (*wkb) += 4; + + switch ((enum wkbType) wkb_type) + { + case wkbPoint: + res = sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbLineString: + res = sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbPolygon: + res = sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbMultiPoint: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiLineString: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiPolygon: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbGeometryCollection: + { + uint n_items; + + if (!top) + return -1; + + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + if (sp_get_geometry_mbr(wkb, end, n_dims, mbr, 0)) + return -1; + } + res = 0; + break; + } + default: + res = -1; + } + return res; +} + +#endif /*HAVE_SPATIAL*/ diff --git a/storage/maria/ma_sp_test.c b/storage/maria/ma_sp_test.c new file mode 100644 index 00000000000..ea812974c8c --- /dev/null +++ b/storage/maria/ma_sp_test.c @@ -0,0 +1,568 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA spatial table */ +/* Written by Alex Barkov, who has a shared copyright to this code */ + +#include "maria.h" + +#ifdef HAVE_SPATIAL +#include "ma_sp_defs.h" + +#define MAX_REC_LENGTH 1024 +#define KEYALG HA_KEY_ALG_RTREE + +static void create_linestring(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); + +static void create_key(char *key,uint rownr); +static void print_key(const char *key,const char * tail); + +static int run_test(const char *filename); +static int read_with_pos(MARIA_HA * file, int silent); + +static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points, + uchar *wkb); +static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims); + +static char blob_key[MAX_REC_LENGTH]; + + +int main(int argc __attribute__((unused)),char *argv[]) +{ + MY_INIT(argv[0]); + maria_init(); + exit(run_test("sp_test")); +} + + +int run_test(const char *filename) +{ + MARIA_HA *file; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + MARIA_COLUMNDEF recinfo[20]; + MARIA_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + key_range min_range, max_range; + int silent=0; + int create_flag=0; + int null_fields=0; + int nrecords=30; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char key[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd=10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + + + /* Define spatial column */ + + recinfo[1].type=FIELD_BLOB; + recinfo[1].length=4 + maria_portable_sizeof_char_ptr; + + + + /* Define a key with 1 spatial segment */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].flag=HA_SPATIAL; + keyinfo[0].key_alg=KEYALG; + + keyinfo[0].seg[0].type= HA_KEYTYPE_BINARY; + keyinfo[0].seg[0].flag=0; + keyinfo[0].seg[0].start= 1; + keyinfo[0].seg[0].length=1; /* Spatial ignores it anyway */ + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language=default_charset_info->number; + keyinfo[0].seg[0].bit_start=4; /* Long BLOB */ + + + if (!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (maria_create(filename, + 1, /* keys */ + keyinfo, + 2, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + if (!silent) + printf("- Open isam-file\n"); + + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i "); + print_record(record,maria_position(file),"\n"); + error=maria_update(file,read_record,record); + if (error) + { + printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Test maria_rkey then a sequence of maria_rnext_same\n"); + + create_key(key, nrecords*4/5); + print_key(key," search for INTERSECT\n"); + + if ((error=maria_rkey(file,read_record,0,key,0,HA_READ_MBR_INTERSECT))) + { + printf("maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rkey\n"); + row_count=1; + + for (;;) + { + if ((error=maria_rnext_same(file,read_record))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext_same\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_rfirst then a sequence of maria_rnext\n"); + + error=maria_rfirst(file,read_record,0); + if (error) + { + printf("maria_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,maria_position(file)," maria_frirst\n"); + + for(i=0;i "); + printf(" offs=%ld ",(long int)offs); + printf("%s",tail); +} + + +#ifdef NOT_USED +static void create_point(char *record,uint rownr) +{ + uint tmp; + char *ptr; + char *pos=record; + double x[200]; + int i; + + for(i=0;i= , <= , > , < +*/ + +uint NEAR maria_read_vec[]= +{ + SEARCH_FIND, SEARCH_FIND | SEARCH_BIGGER, SEARCH_FIND | SEARCH_SMALLER, + SEARCH_NO_FIND | SEARCH_BIGGER, SEARCH_NO_FIND | SEARCH_SMALLER, + SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST, SEARCH_LAST | SEARCH_SMALLER, + MBR_CONTAIN, MBR_INTERSECT, MBR_WITHIN, MBR_DISJOINT, MBR_EQUAL +}; + +uint NEAR maria_readnext_vec[]= +{ + SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER, + SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER +}; diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c new file mode 100644 index 00000000000..c9614de1c72 --- /dev/null +++ b/storage/maria/ma_statrec.c @@ -0,0 +1,301 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /* Functions to handle fixed-length-records */ + +#include "maria_def.h" + + +int _ma_write_static_record(MARIA_HA *info, const byte *record) +{ + uchar temp[8]; /* max pointer length */ + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) + { + my_off_t filepos=info->s->state.dellink; + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info,(char*) &temp[0],info->s->base.rec_reflength, + info->s->state.dellink+1, + MYF(MY_NABP))) + goto err; + info->s->state.dellink= _ma_rec_pos(info->s,temp); + info->state->del--; + info->state->empty-=info->s->base.pack_reclength; + if (info->s->file_write(info, (char*) record, info->s->base.reclength, + filepos, + MYF(MY_NABP))) + goto err; + } + else + { + if (info->state->data_file_length > info->s->base.max_data_file_length- + info->s->base.pack_reclength) + { + my_errno=HA_ERR_RECORD_FILE_FULL; + return(2); + } + if (info->opt_flag & WRITE_CACHE_USED) + { /* Cash in use */ + if (my_b_write(&info->rec_cache, (byte*) record, + info->s->base.reclength)) + goto err; + if (info->s->base.pack_reclength != info->s->base.reclength) + { + uint length=info->s->base.pack_reclength - info->s->base.reclength; + bzero((char*) temp,length); + if (my_b_write(&info->rec_cache, (byte*) temp,length)) + goto err; + } + } + else + { + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_write(info,(char*) record,info->s->base.reclength, + info->state->data_file_length, + info->s->write_flag)) + goto err; + if (info->s->base.pack_reclength != info->s->base.reclength) + { + uint length=info->s->base.pack_reclength - info->s->base.reclength; + bzero((char*) temp,length); + if (info->s->file_write(info, (byte*) temp,length, + info->state->data_file_length+ + info->s->base.reclength, + info->s->write_flag)) + goto err; + } + } + info->state->data_file_length+=info->s->base.pack_reclength; + info->s->state.split++; + } + return 0; + err: + return 1; +} + +int _ma_update_static_record(MARIA_HA *info, my_off_t pos, const byte *record) +{ + info->rec_cache.seek_not_done=1; /* We have done a seek */ + return (info->s->file_write(info, + (char*) record,info->s->base.reclength, + pos, + MYF(MY_NABP)) != 0); +} + + +int _ma_delete_static_record(MARIA_HA *info) +{ + uchar temp[9]; /* 1+sizeof(uint32) */ + + info->state->del++; + info->state->empty+=info->s->base.pack_reclength; + temp[0]= '\0'; /* Mark that record is deleted */ + _ma_dpointer(info,temp+1,info->s->state.dellink); + info->s->state.dellink = info->lastpos; + info->rec_cache.seek_not_done=1; + return (info->s->file_write(info,(byte*) temp, 1+info->s->rec_reflength, + info->lastpos, MYF(MY_NABP)) != 0); +} + + +int _ma_cmp_static_record(register MARIA_HA *info, register const byte *old) +{ + DBUG_ENTER("_ma_cmp_static_record"); + + /* We are going to do changes; dont let anybody disturb */ + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (info->opt_flag & WRITE_CACHE_USED) + { + if (flush_io_cache(&info->rec_cache)) + { + DBUG_RETURN(-1); + } + info->rec_cache.seek_not_done=1; /* We have done a seek */ + } + + if ((info->opt_flag & READ_CHECK_USED)) + { /* If check isn't disabled */ + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength, + info->lastpos, + MYF(MY_NABP))) + DBUG_RETURN(-1); + if (memcmp((byte*) info->rec_buff, (byte*) old, + (uint) info->s->base.reclength)) + { + DBUG_DUMP("read",old,info->s->base.reclength); + DBUG_DUMP("disk",info->rec_buff,info->s->base.reclength); + my_errno=HA_ERR_RECORD_CHANGED; /* Record have changed */ + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +int _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, my_off_t pos) +{ + DBUG_ENTER("_ma_cmp_static_unique"); + + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength, + pos, MYF(MY_NABP))) + DBUG_RETURN(-1); + DBUG_RETURN(_ma_unique_comp(def, record, info->rec_buff, + def->null_are_equal)); +} + + + /* Read a fixed-length-record */ + /* Returns 0 if Ok. */ + /* 1 if record is deleted */ + /* MY_FILE_ERROR on read-error or locking-error */ + +int _ma_read_static_record(register MARIA_HA *info, register my_off_t pos, + register byte *record) +{ + int error; + + if (pos != HA_OFFSET_ERROR) + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file <= pos && + flush_io_cache(&info->rec_cache)) + return(-1); + info->rec_cache.seek_not_done=1; /* We have done a seek */ + + error=info->s->file_read(info,(char*) record,info->s->base.reclength, + pos,MYF(MY_NABP)) != 0; + fast_ma_writeinfo(info); + if (! error) + { + if (!*record) + { + my_errno=HA_ERR_RECORD_DELETED; + return(1); /* Record is deleted */ + } + info->update|= HA_STATE_AKTIV; /* Record is read */ + return(0); + } + return(-1); /* Error on read */ + } + fast_ma_writeinfo(info); /* No such record */ + return(-1); +} + + + +int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, + register my_off_t filepos, + my_bool skip_deleted_blocks) +{ + int locked,error,cache_read; + uint cache_length; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_static_record"); + + cache_read=0; + cache_length=0; + if (info->opt_flag & WRITE_CACHE_USED && + (info->rec_cache.pos_in_file <= filepos || skip_deleted_blocks) && + flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + if (info->opt_flag & READ_CACHE_USED) + { /* Cache in use */ + if (filepos == my_b_tell(&info->rec_cache) && + (skip_deleted_blocks || !filepos)) + { + cache_read=1; /* Read record using cache */ + cache_length=(uint) (info->rec_cache.read_end - info->rec_cache.read_pos); + } + else + info->rec_cache.seek_not_done=1; /* Filepos is changed */ + } + locked=0; + if (info->lock_type == F_UNLCK) + { + if (filepos >= info->state->data_file_length) + { /* Test if new records */ + if (_ma_readinfo(info,F_RDLCK,0)) + DBUG_RETURN(my_errno); + locked=1; + } + else + { /* We don't nead new info */ +#ifndef UNSAFE_LOCKING + if ((! cache_read || share->base.reclength > cache_length) && + share->tot_locks == 0) + { /* record not in cache */ + if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, + MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) + DBUG_RETURN(my_errno); + locked=1; + } +#else + info->tmp_lock_type=F_RDLCK; +#endif + } + } + if (filepos >= info->state->data_file_length) + { + DBUG_PRINT("test",("filepos: %ld (%ld) records: %ld del: %ld", + filepos/share->base.reclength,filepos, + info->state->records, info->state->del)); + fast_ma_writeinfo(info); + DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE); + } + info->lastpos= filepos; + info->nextpos= filepos+share->base.pack_reclength; + + if (! cache_read) /* No cacheing */ + { + if ((error= _ma_read_static_record(info,filepos,buf))) + { + if (error > 0) + error=my_errno=HA_ERR_RECORD_DELETED; + else + error=my_errno; + } + DBUG_RETURN(error); + } + + /* Read record with cacheing */ + error=my_b_read(&info->rec_cache,(byte*) buf,share->base.reclength); + if (info->s->base.pack_reclength != info->s->base.reclength && !error) + { + char tmp[8]; /* Skill fill bytes */ + error=my_b_read(&info->rec_cache,(byte*) tmp, + info->s->base.pack_reclength - info->s->base.reclength); + } + if (locked) + VOID(_ma_writeinfo(info,0)); /* Unlock keyfile */ + if (!error) + { + if (!buf[0]) + { /* Record is removed */ + DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); + } + /* Found and may be updated */ + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + DBUG_RETURN(0); + } + /* my_errno should be set if rec_cache.error == -1 */ + if (info->rec_cache.error != -1 || my_errno == 0) + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(my_errno); /* Something wrong (EOF?) */ +} diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c new file mode 100644 index 00000000000..621a6d90e65 --- /dev/null +++ b/storage/maria/ma_test1.c @@ -0,0 +1,681 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA table */ + +#include "maria.h" +#include +#include + +#define MAX_REC_LENGTH 1024 + +static void usage(); + +static int rec_pointer_size=0, flags[50]; +static int key_field=FIELD_SKIP_PRESPACE,extra_field=FIELD_SKIP_ENDSPACE; +static int key_type=HA_KEYTYPE_NUM; +static int create_flag=0; + +static uint insert_count, update_count, remove_count; +static uint pack_keys=0, pack_seg=0, key_length; +static uint unique_key=HA_NOSAME; +static my_bool key_cacheing, null_fields, silent, skip_update, opt_unique, + verbose; +static MARIA_COLUMNDEF recinfo[4]; +static MARIA_KEYDEF keyinfo[10]; +static HA_KEYSEG keyseg[10]; +static HA_KEYSEG uniqueseg[10]; + +static int run_test(const char *filename); +static void get_options(int argc, char *argv[]); +static void create_key(char *key,uint rownr); +static void create_record(char *record,uint rownr); +static void update_record(char *record); + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + my_init(); + maria_init(); + if (key_cacheing) + init_key_cache(maria_key_cache,KEY_CACHE_BLOCK_SIZE,IO_SIZE*16,0,0); + get_options(argc,argv); + + exit(run_test("test1")); +} + + +static int run_test(const char *filename) +{ + MARIA_HA *file; + int i,j,error,deleted,rec_length,uniques=0; + ha_rows found,row_count; + my_off_t pos; + char record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + + bzero((char*) recinfo,sizeof(recinfo)); + + /* First define 2 columns */ + recinfo[0].type=FIELD_NORMAL; recinfo[0].length=1; /* For NULL bits */ + recinfo[1].type=key_field; + recinfo[1].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr : + key_length); + if (key_field == FIELD_VARCHAR) + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);; + recinfo[2].type=extra_field; + recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : 24); + if (extra_field == FIELD_VARCHAR) + recinfo[2].length+= HA_VARCHAR_PACKLENGTH(recinfo[2].length); + if (opt_unique) + { + recinfo[3].type=FIELD_CHECK; + recinfo[3].length=MARIA_UNIQUE_HASH_LENGTH; + } + rec_length=recinfo[0].length+recinfo[1].length+recinfo[2].length+ + recinfo[3].length; + + if (key_type == HA_KEYTYPE_VARTEXT1 && + key_length > 255) + key_type= HA_KEYTYPE_VARTEXT2; + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].seg[0].type= key_type; + keyinfo[0].seg[0].flag= pack_seg; + keyinfo[0].seg[0].start=1; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language= default_charset_info->number; + if (pack_seg & HA_BLOB_PART) + { + keyinfo[0].seg[0].bit_start=4; /* Length of blob length */ + } + keyinfo[0].flag = (uint8) (pack_keys | unique_key); + + bzero((byte*) flags,sizeof(flags)); + if (opt_unique) + { + uint start; + uniques=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + bzero((char*) uniqueseg,sizeof(uniqueseg)); + uniquedef.seg=uniqueseg; + uniquedef.keysegs=2; + + /* Make a unique over all columns (except first NULL fields) */ + for (i=0, start=1 ; i < 2 ; i++) + { + uniqueseg[i].start=start; + start+=recinfo[i+1].length; + uniqueseg[i].length=recinfo[i+1].length; + uniqueseg[i].language= default_charset_info->number; + } + uniqueseg[0].type= key_type; + uniqueseg[0].null_bit= null_fields ? 2 : 0; + uniqueseg[1].type= HA_KEYTYPE_TEXT; + if (extra_field == FIELD_BLOB) + { + uniqueseg[1].length=0; /* The whole blob */ + uniqueseg[1].bit_start=4; /* long blob */ + uniqueseg[1].flag|= HA_BLOB_PART; + } + else if (extra_field == FIELD_VARCHAR) + uniqueseg[1].flag|= HA_VAR_LENGTH_PART; + } + else + uniques=0; + + if (!silent) + printf("- Creating isam-file\n"); + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=(ulong) (rec_pointer_size ? + (1L << (rec_pointer_size*8))/40 : + 0); + if (maria_create(filename,1,keyinfo,3+opt_unique,recinfo, + uniques, &uniquedef, &create_info, + create_flag)) + goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + row_count=deleted=0; + for (i=49 ; i>=1 ; i-=2 ) + { + if (insert_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + j=i%25 +1; + create_record(record,j); + error=maria_write(file,record); + if (!error) + row_count++; + flags[j]=1; + if (verbose || error) + printf("J= %2d maria_write: %d errno: %d\n", j,error,my_errno); + } + + /* Insert 2 rows with null values */ + if (null_fields) + { + create_record(record,0); + error=maria_write(file,record); + if (!error) + row_count++; + if (verbose || error) + printf("J= NULL maria_write: %d errno: %d\n", error,my_errno); + error=maria_write(file,record); + if (!error) + row_count++; + if (verbose || error) + printf("J= NULL maria_write: %d errno: %d\n", error,my_errno); + flags[0]=2; + } + + if (!skip_update) + { + if (opt_unique) + { + if (!silent) + printf("- Checking unique constraint\n"); + create_record(record,j); + if (!maria_write(file,record) || my_errno != HA_ERR_FOUND_DUPP_UNIQUE) + { + printf("unique check failed\n"); + } + } + if (!silent) + printf("- Updating rows\n"); + + /* Update first last row to force extend of file */ + if (maria_rsame(file,read_record,-1)) + { + printf("Can't find last row with maria_rsame\n"); + } + else + { + memcpy(record,read_record,rec_length); + update_record(record); + if (maria_update(file,read_record,record)) + { + printf("Can't update last row: %.*s\n", + keyinfo[0].seg[0].length,read_record+1); + } + } + + /* Read through all rows and update them */ + pos=(my_off_t) 0; + found=0; + while ((error=maria_rrnd(file,read_record,pos)) == 0) + { + if (update_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + memcpy(record,read_record,rec_length); + update_record(record); + if (maria_update(file,read_record,record)) + { + printf("Can't update row: %.*s, error: %d\n", + keyinfo[0].seg[0].length,record+1,my_errno); + } + found++; + pos=HA_OFFSET_ERROR; + } + if (found != row_count) + printf("Found %ld of %ld rows\n", (ulong) found, (ulong) row_count); + } + + if (!silent) + printf("- Reopening file\n"); + if (maria_close(file)) goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) goto err; + if (!skip_update) + { + if (!silent) + printf("- Removing keys\n"); + + for (i=0 ; i <= 10 ; i++) + { + /* testing */ + if (remove_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + j=i*2; + if (!flags[j]) + continue; + create_key(key,j); + my_errno=0; + if ((error = maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))) + { + if (verbose || (flags[j] >= 1 || + (error && my_errno != HA_ERR_KEY_NOT_FOUND))) + printf("key: '%.*s' maria_rkey: %3d errno: %3d\n", + (int) key_length,key+test(null_fields),error,my_errno); + } + else + { + error=maria_delete(file,read_record); + if (verbose || error) + printf("key: '%.*s' maria_delete: %3d errno: %3d\n", + (int) key_length, key+test(null_fields), error, my_errno); + if (! error) + { + deleted++; + flags[j]--; + } + } + } + } + if (!silent) + printf("- Reading rows with key\n"); + for (i=0 ; i <= 25 ; i++) + { + create_key(key,i); + my_errno=0; + error=maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT); + if (verbose || + (error == 0 && flags[i] == 0 && unique_key) || + (error && (flags[i] != 0 || my_errno != HA_ERR_KEY_NOT_FOUND))) + { + printf("key: '%.*s' maria_rkey: %3d errno: %3d record: %s\n", + (int) key_length,key+test(null_fields),error,my_errno,record+1); + } + } + + if (!silent) + printf("- Reading rows with position\n"); + for (i=1,found=0 ; i <= 30 ; i++) + { + my_errno=0; + if ((error=maria_rrnd(file,read_record,i == 1 ? 0L : HA_OFFSET_ERROR)) == -1) + { + if (found != row_count-deleted) + printf("Found only %ld of %ld rows\n", (ulong) found, + (ulong) (row_count - deleted)); + break; + } + if (!error) + found++; + if (verbose || (error != 0 && error != HA_ERR_RECORD_DELETED && + error != HA_ERR_END_OF_FILE)) + { + printf("pos: %2d maria_rrnd: %3d errno: %3d record: %s\n", + i-1,error,my_errno,read_record+1); + } + } + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + + +static void create_key_part(char *key,uint rownr) +{ + if (!unique_key) + rownr&=7; /* Some identical keys */ + if (keyinfo[0].seg[0].type == HA_KEYTYPE_NUM) + { + sprintf(key,"%*d",keyinfo[0].seg[0].length,rownr); + } + else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 || + keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2) + { /* Alpha record */ + /* Create a key that may be easily packed */ + bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); + sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr); + if ((rownr & 7) == 0) + { + /* Change the key to force a unpack of the next key */ + bfill(key+3,keyinfo[0].seg[0].length-4,rownr < 10 ? 'a' : 'b'); + } + } + else + { /* Alpha record */ + if (keyinfo[0].seg[0].flag & HA_SPACE_PACK) + sprintf(key,"%-*d",keyinfo[0].seg[0].length,rownr); + else + { + /* Create a key that may be easily packed */ + bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); + sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr); + if ((rownr & 7) == 0) + { + /* Change the key to force a unpack of the next key */ + key[1]= (rownr < 10 ? 'a' : 'b'); + } + } + } +} + + +static void create_key(char *key,uint rownr) +{ + if (keyinfo[0].seg[0].null_bit) + { + if (rownr == 0) + { + key[0]=1; /* null key */ + key[1]=0; /* Fore easy print of key */ + return; + } + *key++=0; + } + if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint tmp; + create_key_part(key+2,rownr); + tmp=strlen(key+2); + int2store(key,tmp); + } + else + create_key_part(key,rownr); +} + + +static char blob_key[MAX_REC_LENGTH]; +static char blob_record[MAX_REC_LENGTH+20*20]; + + +static void create_record(char *record,uint rownr) +{ + char *pos; + bzero((char*) record,MAX_REC_LENGTH); + record[0]=1; /* delete marker */ + if (rownr == 0 && keyinfo[0].seg[0].null_bit) + record[0]|=keyinfo[0].seg[0].null_bit; /* Null key */ + + pos=record+1; + if (recinfo[1].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + create_key_part(blob_key,rownr); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[1].length; + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + create_key_part(pos+pack_length,rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + pos+= recinfo[1].length; + } + else + { + create_key_part(pos,rownr); + pos+=recinfo[1].length; + } + if (recinfo[2].type == FIELD_BLOB) + { + uint tmp; + char *ptr;; + sprintf(blob_record,"... row: %d", rownr); + strappend(blob_record,max(MAX_REC_LENGTH-rownr,10),' '); + tmp=strlen(blob_record); + int4store(pos,tmp); + ptr=blob_record; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + } + else if (recinfo[2].type == FIELD_VARCHAR) + { + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + sprintf(pos+pack_length, "... row: %d", rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + } + else + { + sprintf(pos,"... row: %d", rownr); + strappend(pos,recinfo[2].length,' '); + } +} + +/* change row to test re-packing of rows and reallocation of keys */ + +static void update_record(char *record) +{ + char *pos=record+1; + if (recinfo[1].type == FIELD_BLOB) + { + char *column,*ptr; + int length; + length=uint4korr(pos); /* Long blob */ + memcpy_fixed(&column,pos+4,sizeof(char*)); + memcpy(blob_key,column,length); /* Move old key */ + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */ + if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) + default_charset_info->cset->casedn(default_charset_info, + blob_key, length, blob_key, length); + pos+=recinfo[1].length; + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + default_charset_info->cset->casedn(default_charset_info, + pos + pack_length, length, + pos + pack_length, length); + pos+=recinfo[1].length; + } + else + { + if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) + default_charset_info->cset->casedn(default_charset_info, + pos, keyinfo[0].seg[0].length, + pos, keyinfo[0].seg[0].length); + pos+=recinfo[1].length; + } + + if (recinfo[2].type == FIELD_BLOB) + { + char *column; + int length; + length=uint4korr(pos); + memcpy_fixed(&column,pos+4,sizeof(char*)); + memcpy(blob_record,column,length); + bfill(blob_record+length,20,'.'); /* Make it larger */ + length+=20; + int4store(pos,length); + column=blob_record; + memcpy_fixed(pos+4,&column,sizeof(char*)); + } + else if (recinfo[2].type == FIELD_VARCHAR) + { + /* Second field is longer than 10 characters */ + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + bfill(pos+pack_length+length,recinfo[2].length-length-pack_length,'.'); + length=recinfo[2].length-pack_length; + if (pack_length == 1) + *(uchar*) pos= (uchar) length; + else + int2store(pos,length); + } + else + { + bfill(pos+recinfo[2].length-10,10,'.'); + } +} + + +static struct my_option my_long_options[] = +{ + {"checksum", 'c', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', "Undocumented", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"delete_rows", 'd', "Undocumented", (gptr*) &remove_count, + (gptr*) &remove_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"insert_rows", 'i', "Undocumented", (gptr*) &insert_count, + (gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"key_alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_binary_pack", 'B', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_blob", 'b', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_cache", 'K', "Undocumented", (gptr*) &key_cacheing, + (gptr*) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length, + 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0}, + {"key_multiple", 'm', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_prefix_pack", 'P', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_space_pack", 'p', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key_varchar", 'w', "Test VARCHAR keys", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"null_fields", 'N', "Define fields with NULL", + (gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, + {"row_fixed_size", 'S', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"row_pointer_size", 'R', "Undocumented", (gptr*) &rec_pointer_size, + (gptr*) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', "Undocumented", + (gptr*) &silent, (gptr*) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip_update", 'U', "Undocumented", (gptr*) &skip_update, + (gptr*) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"unique", 'C', "Undocumented", (gptr*) &opt_unique, (gptr*) &opt_unique, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"update_rows", 'u', "Undocumented", (gptr*) &update_count, + (gptr*) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be more verbose", (gptr*) &verbose, (gptr*) &verbose, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'a': + key_type= HA_KEYTYPE_TEXT; + break; + case 'c': + create_flag|= HA_CREATE_CHECKSUM; + break; + case 'R': /* Length of record pointer */ + if (rec_pointer_size > 3) + rec_pointer_size=0; + break; + case 'P': + pack_keys= HA_PACK_KEY; /* Use prefix compression */ + break; + case 'B': + pack_keys= HA_BINARY_PACK_KEY; /* Use binary compression */ + break; + case 'S': + if (key_field == FIELD_VARCHAR) + { + create_flag=0; /* Static sized varchar */ + } + else if (key_field != FIELD_BLOB) + { + key_field=FIELD_NORMAL; /* static-size record */ + extra_field=FIELD_NORMAL; + } + break; + case 'p': + pack_keys=HA_PACK_KEY; /* Use prefix + space packing */ + pack_seg=HA_SPACE_PACK; + key_type=HA_KEYTYPE_TEXT; + break; + case 'm': + unique_key=0; + break; + case 'b': + key_field=FIELD_BLOB; /* blob key */ + extra_field= FIELD_BLOB; + pack_seg|= HA_BLOB_PART; + key_type= HA_KEYTYPE_VARTEXT1; + break; + case 'k': + if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) + { + fprintf(stderr,"Wrong key length\n"); + exit(1); + } + break; + case 'w': + key_field=FIELD_VARCHAR; /* varchar keys */ + extra_field= FIELD_VARCHAR; + key_type= HA_KEYTYPE_VARTEXT1; + pack_seg|= HA_VAR_LENGTH_PART; + create_flag|= HA_PACK_RECORD; + break; + case 'K': /* Use key cacheing */ + key_cacheing=1; + break; + case 'V': + printf("test1 Ver 1.2 \n"); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + usage(); + exit(1); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c new file mode 100644 index 00000000000..06cfaf7cc5e --- /dev/null +++ b/storage/maria/ma_test2.c @@ -0,0 +1,1050 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Test av isam-databas: stor test */ + +#ifndef USE_MY_FUNC /* We want to be able to dbug this !! */ +#define USE_MY_FUNC +#endif +#ifdef DBUG_OFF +#undef DBUG_OFF +#endif +#ifndef SAFEMALLOC +#define SAFEMALLOC +#endif +#include "maria_def.h" +#include + +#define STANDARD_LENGTH 37 +#define MARIA_KEYS 6 +#define MAX_PARTS 4 +#if !defined(MSDOS) && !defined(labs) +#define labs(a) abs(a) +#endif + +static void get_options(int argc, char *argv[]); +static uint rnd(uint max_value); +static void fix_length(byte *record,uint length); +static void put_blob_in_record(char *blob_pos,char **blob_buffer); +static void copy_key(struct st_maria_info *info,uint inx, + uchar *record,uchar *key); + +static int verbose=0,testflag=0, + first_key=0,async_io=0,key_cacheing=0,write_cacheing=0,locking=0, + rec_pointer_size=0,pack_fields=1,use_log=0,silent=0, + opt_quick_mode=0; +static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1, + create_flag=0; +static ulong key_cache_size=IO_SIZE*16; +static uint key_cache_block_size= KEY_CACHE_BLOCK_SIZE; + +static uint keys=MARIA_KEYS,recant=1000; +static uint use_blob=0; +static uint16 key1[1001],key3[5000]; +static char record[300],record2[300],key[100],key2[100], + read_record[300],read_record2[300],read_record3[300]; +static HA_KEYSEG glob_keyseg[MARIA_KEYS][MAX_PARTS]; + + /* Test program */ + +int main(int argc, char *argv[]) +{ + uint i; + int j,n1,n2,n3,error,k; + uint write_count,update,dupp_keys,opt_delete,start,length,blob_pos, + reclength,ant,found_parts; + my_off_t lastpos; + ha_rows range_records,records; + MARIA_HA *file; + MARIA_KEYDEF keyinfo[10]; + MARIA_COLUMNDEF recinfo[10]; + MARIA_INFO info; + const char *filename; + char *blob_buffer; + MARIA_CREATE_INFO create_info; + MY_INIT(argv[0]); + + filename= "test2"; + get_options(argc,argv); + if (! async_io) + my_disable_async_io=1; + + maria_init(); + reclength=STANDARD_LENGTH+60+(use_blob ? 8 : 0); + blob_pos=STANDARD_LENGTH+60; + keyinfo[0].seg= &glob_keyseg[0][0]; + keyinfo[0].seg[0].start=0; + keyinfo[0].seg[0].length=6; + keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].language= default_charset_info->number; + keyinfo[0].seg[0].flag=(uint8) pack_seg; + keyinfo[0].seg[0].null_bit=0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].keysegs=1; + keyinfo[0].flag = pack_type; + keyinfo[1].seg= &glob_keyseg[1][0]; + keyinfo[1].seg[0].start=7; + keyinfo[1].seg[0].length=6; + keyinfo[1].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[1].seg[0].flag=0; + keyinfo[1].seg[0].null_bit=0; + keyinfo[1].seg[0].null_pos=0; + keyinfo[1].seg[1].start=0; /* two part key */ + keyinfo[1].seg[1].length=6; + keyinfo[1].seg[1].type=HA_KEYTYPE_NUM; + keyinfo[1].seg[1].flag=HA_REVERSE_SORT; + keyinfo[1].seg[1].null_bit=0; + keyinfo[1].seg[1].null_pos=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; + keyinfo[1].keysegs=2; + keyinfo[1].flag =0; + keyinfo[2].seg= &glob_keyseg[2][0]; + keyinfo[2].seg[0].start=12; + keyinfo[2].seg[0].length=8; + keyinfo[2].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[2].seg[0].flag=HA_REVERSE_SORT; + keyinfo[2].seg[0].null_bit=0; + keyinfo[2].seg[0].null_pos=0; + keyinfo[2].key_alg=HA_KEY_ALG_BTREE; + keyinfo[2].keysegs=1; + keyinfo[2].flag =HA_NOSAME; + keyinfo[3].seg= &glob_keyseg[3][0]; + keyinfo[3].seg[0].start=0; + keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0); + keyinfo[3].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[3].seg[0].language=default_charset_info->number; + keyinfo[3].seg[0].flag=(uint8) pack_seg; + keyinfo[3].seg[0].null_bit=0; + keyinfo[3].seg[0].null_pos=0; + keyinfo[3].key_alg=HA_KEY_ALG_BTREE; + keyinfo[3].keysegs=1; + keyinfo[3].flag = pack_type; + keyinfo[4].seg= &glob_keyseg[4][0]; + keyinfo[4].seg[0].start=0; + keyinfo[4].seg[0].length=5; + keyinfo[4].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[4].seg[0].language=default_charset_info->number; + keyinfo[4].seg[0].flag=0; + keyinfo[4].seg[0].null_bit=0; + keyinfo[4].seg[0].null_pos=0; + keyinfo[4].key_alg=HA_KEY_ALG_BTREE; + keyinfo[4].keysegs=1; + keyinfo[4].flag = pack_type; + keyinfo[5].seg= &glob_keyseg[5][0]; + keyinfo[5].seg[0].start=0; + keyinfo[5].seg[0].length=4; + keyinfo[5].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[5].seg[0].language=default_charset_info->number; + keyinfo[5].seg[0].flag=pack_seg; + keyinfo[5].seg[0].null_bit=0; + keyinfo[5].seg[0].null_pos=0; + keyinfo[5].key_alg=HA_KEY_ALG_BTREE; + keyinfo[5].keysegs=1; + keyinfo[5].flag = pack_type; + + recinfo[0].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[0].length=7; + recinfo[0].null_bit=0; + recinfo[0].null_pos=0; + recinfo[1].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[1].length=5; + recinfo[1].null_bit=0; + recinfo[1].null_pos=0; + recinfo[2].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[2].length=9; + recinfo[2].null_bit=0; + recinfo[2].null_pos=0; + recinfo[3].type=FIELD_NORMAL; + recinfo[3].length=STANDARD_LENGTH-7-5-9-4; + recinfo[3].null_bit=0; + recinfo[3].null_pos=0; + recinfo[4].type=pack_fields ? FIELD_SKIP_ZERO : 0; + recinfo[4].length=4; + recinfo[4].null_bit=0; + recinfo[4].null_pos=0; + recinfo[5].type=pack_fields ? FIELD_SKIP_ENDSPACE : 0; + recinfo[5].length=60; + recinfo[5].null_bit=0; + recinfo[5].null_pos=0; + if (use_blob) + { + recinfo[6].type=FIELD_BLOB; + recinfo[6].length=4+maria_portable_sizeof_char_ptr; + recinfo[6].null_bit=0; + recinfo[6].null_pos=0; + } + + write_count=update=dupp_keys=opt_delete=0; + blob_buffer=0; + + for (i=1000 ; i>0 ; i--) key1[i]=0; + for (i=4999 ; i>0 ; i--) key3[i]=0; + + if (!silent) + printf("- Creating isam-file\n"); + /* DBUG_PUSH(""); */ + /* my_delete(filename,MYF(0)); */ /* Remove old locks under gdb */ + file= 0; + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=(ha_rows) (rec_pointer_size ? + (1L << (rec_pointer_size*8))/ + reclength : 0); + create_info.reloc_rows=(ha_rows) 100; + if (maria_create(filename,keys,&keyinfo[first_key], + use_blob ? 7 : 6, &recinfo[0], + 0,(MARIA_UNIQUEDEF*) 0, + &create_info,create_flag)) + goto err; + if (use_log) + maria_logging(1); + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + if (!silent) + printf("- Writing key:s\n"); + if (key_cacheing) + init_key_cache(maria_key_cache,key_cache_block_size,key_cache_size,0,0); + if (locking) + maria_lock_database(file,F_WRLCK); + if (write_cacheing) + maria_extra(file,HA_EXTRA_WRITE_CACHE,0); + if (opt_quick_mode) + maria_extra(file,HA_EXTRA_QUICK,0); + + for (i=0 ; i < recant ; i++) + { + n1=rnd(1000); n2=rnd(100); n3=rnd(5000); + sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count); + int4store(record+STANDARD_LENGTH-4,(long) i); + fix_length(record,(uint) STANDARD_LENGTH+rnd(60)); + put_blob_in_record(record+blob_pos,&blob_buffer); + DBUG_PRINT("test",("record: %d",i)); + + if (maria_write(file,record)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0) + { + printf("Error: %d in write at record: %d\n",my_errno,i); + goto err; + } + if (verbose) printf(" Double key: %d\n",n3); + } + else + { + if (key3[n3] == 1 && first_key <3 && first_key+keys >= 3) + { + printf("Error: Didn't get error when writing second key: '%8d'\n",n3); + goto err; + } + write_count++; key1[n1]++; key3[n3]=1; + } + + /* Check if we can find key without flushing database */ + if (i == recant/2) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + if (!j) + for (j=999 ; j>0 && key1[j] == 0 ; j--) ; + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("Test in loop: Can't find key: \"%s\"\n",key); + goto err; + } + } + } + if (testflag==1) goto end; + + if (write_cacheing) + { + if (maria_extra(file,HA_EXTRA_NO_CACHE,0)) + { + puts("got error from maria_extra(HA_EXTRA_NO_CACHE)"); + goto end; + } + } + if (key_cacheing) + resize_key_cache(maria_key_cache,key_cache_block_size,key_cache_size*2,0,0); + + if (!silent) + printf("- Delete\n"); + for (i=0 ; i0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + if (opt_delete == (uint) remove_count) /* While testing */ + goto end; + if (maria_delete(file,read_record)) + { + printf("error: %d; can't delete record: \"%s\"\n", my_errno,read_record); + goto err; + } + opt_delete++; + key1[atoi(read_record+keyinfo[0].seg[0].start)]--; + key3[atoi(read_record+keyinfo[2].seg[0].start)]=0; + } + else + puts("Warning: Skipping delete test because no dupplicate keys"); + } + if (testflag==2) goto end; + + if (!silent) + printf("- Update\n"); + for (i=0 ; i0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + if (use_blob) + { + if (i & 1) + put_blob_in_record(record+blob_pos,&blob_buffer); + else + bmove(record+blob_pos,read_record+blob_pos,8); + } + if (maria_update(file,read_record,record2)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0) + { + printf("error: %d; can't update:\nFrom: \"%s\"\nTo: \"%s\"\n", + my_errno,read_record,record2); + goto err; + } + if (verbose) + printf("Double key when tried to update:\nFrom: \"%s\"\nTo: \"%s\"\n",record,record2); + } + else + { + key1[atoi(read_record+keyinfo[0].seg[0].start)]--; + key3[atoi(read_record+keyinfo[2].seg[0].start)]=0; + key1[n1]++; key3[n3]=1; + update++; + } + } + } + if (testflag == 3) + goto end; + + for (i=999, dupp_keys=j=0 ; i>0 ; i--) + { + if (key1[i] > dupp_keys) + { + dupp_keys=key1[i]; j=i; + } + } + sprintf(key,"%6d",j); + start=keyinfo[0].seg[0].start; + length=keyinfo[0].seg[0].length; + if (dupp_keys) + { + if (!silent) + printf("- Same key: first - next -> last - prev -> first\n"); + DBUG_PRINT("progpos",("first - next -> last - prev -> first")); + if (verbose) printf(" Using key: \"%s\" Keys: %d\n",key,dupp_keys); + + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + goto err; + if (maria_rsame(file,read_record2,-1)) + goto err; + if (memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame didn't find same record\n"); + goto end; + } + info.recpos=maria_position(file); + if (maria_rfirst(file,read_record2,0) || + maria_rsame_with_pos(file,read_record2,0,info.recpos) || + memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame_with_pos didn't find same record\n"); + goto end; + } + { + int skr=maria_rnext(file,read_record2,0); + if ((skr && my_errno != HA_ERR_END_OF_FILE) || + maria_rprev(file,read_record2,-1) || + memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame_with_pos lost position\n"); + goto end; + } + } + ant=1; + while (maria_rnext(file,read_record2,0) == 0 && + memcmp(read_record2+start,key,length) == 0) ant++; + if (ant != dupp_keys) + { + printf("next: Found: %d keys of %d\n",ant,dupp_keys); + goto end; + } + ant=0; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys) + { + printf("prev: Found: %d records of %d\n",ant,dupp_keys); + goto end; + } + + /* Check of maria_rnext_same */ + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + goto err; + ant=1; + while (!maria_rnext_same(file,read_record3) && ant < dupp_keys+10) + ant++; + if (ant != dupp_keys || my_errno != HA_ERR_END_OF_FILE) + { + printf("maria_rnext_same: Found: %d records of %d\n",ant,dupp_keys); + goto end; + } + } + + if (!silent) + printf("- All keys: first - next -> last - prev -> first\n"); + DBUG_PRINT("progpos",("All keys: first - next -> last - prev -> first")); + ant=1; + if (maria_rfirst(file,read_record,0)) + { + printf("Can't find first record\n"); + goto end; + } + while ((error=maria_rnext(file,read_record3,0)) == 0 && ant < write_count+10) + ant++; + if (ant != write_count - opt_delete || error != HA_ERR_END_OF_FILE) + { + printf("next: I found: %d records of %d (error: %d)\n", + ant, write_count - opt_delete, error); + goto end; + } + if (maria_rlast(file,read_record2,0) || + bcmp(read_record2,read_record3,reclength)) + { + printf("Can't find last record\n"); + DBUG_DUMP("record2",(byte*) read_record2,reclength); + DBUG_DUMP("record3",(byte*) read_record3,reclength); + goto end; + } + ant=1; + while (maria_rprev(file,read_record3,0) == 0 && ant < write_count+10) + ant++; + if (ant != write_count - opt_delete) + { + printf("prev: I found: %d records of %d\n",ant,write_count); + goto end; + } + if (bcmp(read_record,read_record3,reclength)) + { + printf("Can't find first record\n"); + goto end; + } + + if (!silent) + printf("- Test if: Read first - next - prev - prev - next == first\n"); + DBUG_PRINT("progpos",("- Read first - next - prev - prev - next == first")); + if (maria_rfirst(file,read_record,0) || + maria_rnext(file,read_record3,0) || + maria_rprev(file,read_record3,0) || + maria_rprev(file,read_record3,0) == 0 || + maria_rnext(file,read_record3,0)) + goto err; + if (bcmp(read_record,read_record3,reclength) != 0) + printf("Can't find first record\n"); + + if (!silent) + printf("- Test if: Read last - prev - next - next - prev == last\n"); + DBUG_PRINT("progpos",("Read last - prev - next - next - prev == last")); + if (maria_rlast(file,read_record2,0) || + maria_rprev(file,read_record3,0) || + maria_rnext(file,read_record3,0) || + maria_rnext(file,read_record3,0) == 0 || + maria_rprev(file,read_record3,0)) + goto err; + if (bcmp(read_record2,read_record3,reclength)) + printf("Can't find last record\n"); + + if (!silent) + puts("- Test read key-part"); + strmov(key2,key); + for(i=strlen(key2) ; i-- > 1 ;) + { + key2[i]=0; + + /* The following row is just to catch some bugs in the key code */ + bzero((char*) file->lastkey,file->s->base.max_key_length*2); + if (maria_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX)) + goto err; + if (bcmp(read_record+start,key,(uint) i)) + { + puts("Didn't find right record"); + goto end; + } + } + if (dupp_keys > 2) + { + if (!silent) + printf("- Read key (first) - next - delete - next -> last\n"); + DBUG_PRINT("progpos",("first - next - delete - next -> last")); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) goto err; + if (maria_rnext(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + while (maria_rnext(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-1) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1); + goto end; + } + } + if (dupp_keys>4) + { + if (!silent) + printf("- Read last of key - prev - delete - prev -> first\n"); + DBUG_PRINT("progpos",("last - prev - delete - prev -> first")); + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-2) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2); + goto end; + } + } + if (dupp_keys > 6) + { + if (!silent) + printf("- Read first - delete - next -> last\n"); + DBUG_PRINT("progpos",("first - delete - next -> last")); + if (maria_rkey(file,read_record3,0,key,0,HA_READ_KEY_EXACT)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + if (maria_rnext(file,read_record,0)) + goto err; /* Skall finnas poster */ + while (maria_rnext(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-3) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3); + goto end; + } + + if (!silent) + printf("- Read last - delete - prev -> first\n"); + DBUG_PRINT("progpos",("last - delete - prev -> first")); + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=0; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-4) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4); + goto end; + } + } + + if (!silent) + puts("- Test if: Read rrnd - same"); + DBUG_PRINT("progpos",("Read rrnd - same")); + for (i=0 ; i < write_count ; i++) + { + if (maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR) == 0) + break; + } + if (i == write_count) + goto err; + + bmove(read_record2,read_record,reclength); + for (i=min(2,keys) ; i-- > 0 ;) + { + if (maria_rsame(file,read_record2,(int) i)) goto err; + if (bcmp(read_record,read_record2,reclength) != 0) + { + printf("is_rsame didn't find same record\n"); + goto end; + } + } + if (!silent) + puts("- Test maria_records_in_range"); + maria_status(file,&info,HA_STATUS_VARIABLE); + for (i=0 ; i < info.keys ; i++) + { + key_range min_key, max_key; + if (maria_rfirst(file,read_record,(int) i) || + maria_rlast(file,read_record2,(int) i)) + goto err; + copy_key(file,(uint) i,(uchar*) read_record,(uchar*) key); + copy_key(file,(uint) i,(uchar*) read_record2,(uchar*) key2); + min_key.key= key; + min_key.length= USE_WHOLE_KEY; + min_key.flag= HA_READ_KEY_EXACT; + max_key.key= key2; + max_key.length= USE_WHOLE_KEY; + max_key.flag= HA_READ_AFTER_KEY; + + range_records= maria_records_in_range(file,(int) i, &min_key, &max_key); + if (range_records < info.records*8/10 || + range_records > info.records*12/10) + { + printf("maria_records_range returned %ld; Should be about %ld\n", + (long) range_records,(long) info.records); + goto end; + } + if (verbose) + { + printf("maria_records_range returned %ld; Exact is %ld (diff: %4.2g %%)\n", + (long) range_records, (long) info.records, + labs((long) range_records - (long) info.records)*100.0/ + info.records); + } + } + for (i=0 ; i < 5 ; i++) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + for (k=rnd(1000)+1 ; k>0 && key1[k] == 0 ; k--) ; + if (j != 0 && k != 0) + { + key_range min_key, max_key; + if (j > k) + swap_variables(int, j, k); + sprintf(key,"%6d",j); + sprintf(key2,"%6d",k); + + min_key.key= key; + min_key.length= USE_WHOLE_KEY; + min_key.flag= HA_READ_AFTER_KEY; + max_key.key= key2; + max_key.length= USE_WHOLE_KEY; + max_key.flag= HA_READ_BEFORE_KEY; + range_records= maria_records_in_range(file, 0, &min_key, &max_key); + records=0; + for (j++ ; j < k ; j++) + records+=key1[j]; + if ((long) range_records < (long) records*7/10-2 || + (long) range_records > (long) records*14/10+2) + { + printf("maria_records_range for key: %d returned %lu; Should be about %lu\n", + i, (ulong) range_records, (ulong) records); + goto end; + } + if (verbose && records) + { + printf("maria_records_range returned %lu; Exact is %lu (diff: %4.2g %%)\n", + (ulong) range_records, (ulong) records, + labs((long) range_records-(long) records)*100.0/records); + + } + } + } + + if (!silent) + printf("- maria_info\n"); + maria_status(file,&info,HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (info.records != write_count-opt_delete || info.deleted > opt_delete + update + || info.keys != keys) + { + puts("Wrong info from maria_info"); + printf("Got: records: %lu delete: %lu i_keys: %d\n", + (ulong) info.records, (ulong) info.deleted, info.keys); + } + if (verbose) + { + char buff[80]; + get_date(buff,3,info.create_time); + printf("info: Created %s\n",buff); + get_date(buff,3,info.check_time); + printf("info: checked %s\n",buff); + get_date(buff,3,info.update_time); + printf("info: Modified %s\n",buff); + } + + maria_panic(HA_PANIC_WRITE); + maria_panic(HA_PANIC_READ); + if (maria_is_changed(file)) + puts("Warning: maria_is_changed reported that datafile was changed"); + + if (!silent) + printf("- maria_extra(CACHE) + maria_rrnd.... + maria_extra(NO_CACHE)\n"); + if (maria_extra(file,HA_EXTRA_RESET,0) || maria_extra(file,HA_EXTRA_CACHE,0)) + { + if (locking || (!use_blob && !pack_fields)) + { + puts("got error from maria_extra(HA_EXTRA_CACHE)"); + goto end; + } + } + ant=0; + while ((error=maria_rrnd(file,record,HA_OFFSET_ERROR)) != HA_ERR_END_OF_FILE && + ant < write_count + 10) + ant+= error ? 0 : 1; + if (ant != write_count-opt_delete) + { + printf("rrnd with cache: I can only find: %d records of %d\n", + ant,write_count-opt_delete); + goto end; + } + if (maria_extra(file,HA_EXTRA_NO_CACHE,0)) + { + puts("got error from maria_extra(HA_EXTRA_NO_CACHE)"); + goto end; + } + + ant=0; + maria_scan_init(file); + while ((error=maria_scan(file,record)) != HA_ERR_END_OF_FILE && + ant < write_count + 10) + ant+= error ? 0 : 1; + if (ant != write_count-opt_delete) + { + printf("scan with cache: I can only find: %d records of %d\n", + ant,write_count-opt_delete); + goto end; + } + + if (testflag == 4) goto end; + + if (!silent) + printf("- Removing keys\n"); + DBUG_PRINT("progpos",("Removing keys")); + lastpos = HA_OFFSET_ERROR; + /* DBUG_POP(); */ + maria_extra(file,HA_EXTRA_RESET,0); + found_parts=0; + while ((error=maria_rrnd(file,read_record,HA_OFFSET_ERROR)) != + HA_ERR_END_OF_FILE) + { + info.recpos=maria_position(file); + if (lastpos >= info.recpos && lastpos != HA_OFFSET_ERROR) + { + printf("maria_rrnd didn't advance filepointer; old: %ld, new: %ld\n", + (long) lastpos, (long) info.recpos); + goto err; + } + lastpos=info.recpos; + if (error == 0) + { + if (opt_delete == (uint) remove_count) /* While testing */ + goto end; + if (maria_rsame(file,read_record,-1)) + { + printf("can't find record %lx\n",(long) info.recpos); + goto err; + } + if (use_blob) + { + ulong blob_length,pos; + uchar *ptr; + longget(blob_length,read_record+blob_pos+4); + ptr=(uchar*) blob_length; + longget(blob_length,read_record+blob_pos); + for (pos=0 ; pos < blob_length ; pos++) + { + if (ptr[pos] != (uchar) (blob_length+pos)) + { + printf("found blob with wrong info at %ld\n",(long) lastpos); + use_blob=0; + break; + } + } + } + if (maria_delete(file,read_record)) + { + printf("can't delete record: %6.6s, delete_count: %d\n", + read_record, opt_delete); + goto err; + } + opt_delete++; + } + else + found_parts++; + } + if (my_errno != HA_ERR_END_OF_FILE && my_errno != HA_ERR_RECORD_DELETED) + printf("error: %d from maria_rrnd\n",my_errno); + if (write_count != opt_delete) + { + printf("Deleted only %d of %d records (%d parts)\n",opt_delete,write_count, + found_parts); + goto err; + } +end: + if (maria_close(file)) + goto err; + maria_panic(HA_PANIC_CLOSE); /* Should close log */ + if (!silent) + { + printf("\nFollowing test have been made:\n"); + printf("Write records: %d\nUpdate records: %d\nSame-key-read: %d\nDelete records: %d\n", write_count,update,dupp_keys,opt_delete); + if (rec_pointer_size) + printf("Record pointer size: %d\n",rec_pointer_size); + printf("maria_block_size: %lu\n", maria_block_size); + if (key_cacheing) + { + puts("Key cache used"); + printf("key_cache_block_size: %u\n", key_cache_block_size); + if (write_cacheing) + puts("Key cache resized"); + } + if (write_cacheing) + puts("Write cacheing used"); + if (write_cacheing) + puts("quick mode"); + if (async_io && locking) + puts("Asyncron io with locking used"); + else if (locking) + puts("Locking used"); + if (use_blob) + puts("blobs used"); + printf("key cache status: \n\ +blocks used:%10lu\n\ +not flushed:%10lu\n\ +w_requests: %10lu\n\ +writes: %10lu\n\ +r_requests: %10lu\n\ +reads: %10lu\n", + maria_key_cache->blocks_used, + maria_key_cache->global_blocks_changed, + (ulong) maria_key_cache->global_cache_w_requests, + (ulong) maria_key_cache->global_cache_write, + (ulong) maria_key_cache->global_cache_r_requests, + (ulong) maria_key_cache->global_cache_read); + } + end_key_cache(maria_key_cache,1); + if (blob_buffer) + my_free(blob_buffer,MYF(0)); + my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO); + return(0); +err: + printf("got error: %d when using MARIA-database\n",my_errno); + if (file) + VOID(maria_close(file)); + maria_end(); + return(1); +} /* main */ + + + /* l{ser optioner */ + /* OBS! intierar endast DEBUG - ingen debuggning h{r ! */ + +static void get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'B': + pack_type= HA_BINARY_PACK_KEY; + break; + case 'b': + use_blob=1; + break; + case 'K': /* Use key cacheing */ + key_cacheing=1; + if (*++pos) + key_cache_size=atol(pos); + break; + case 'W': /* Use write cacheing */ + write_cacheing=1; + if (*++pos) + my_default_record_cache_size=atoi(pos); + break; + case 'd': + remove_count= atoi(++pos); + break; + case 'i': + if (*++pos) + srand(atoi(pos)); + break; + case 'l': + use_log=1; + break; + case 'L': + locking=1; + break; + case 'A': /* use asyncron io */ + async_io=1; + if (*++pos) + my_default_record_cache_size=atoi(pos); + break; + case 'v': /* verbose */ + verbose=1; + break; + case 'm': /* records */ + if ((recant=atoi(++pos)) < 10) + { + fprintf(stderr,"record count must be >= 10\n"); + exit(1); + } + break; + case 'e': /* maria_block_length */ + if ((maria_block_size=atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || + maria_block_size > MARIA_MAX_KEY_BLOCK_LENGTH) + { + fprintf(stderr,"Wrong maria_block_length\n"); + exit(1); + } + maria_block_size=1 << my_bit_log2(maria_block_size); + break; + case 'E': /* maria_block_length */ + if ((key_cache_block_size=atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || + key_cache_block_size > MARIA_MAX_KEY_BLOCK_LENGTH) + { + fprintf(stderr,"Wrong key_cache_block_size\n"); + exit(1); + } + key_cache_block_size=1 << my_bit_log2(key_cache_block_size); + break; + case 'f': + if ((first_key=atoi(++pos)) < 0 || first_key >= MARIA_KEYS) + first_key=0; + break; + case 'k': + if ((keys=(uint) atoi(++pos)) < 1 || + keys > (uint) (MARIA_KEYS-first_key)) + keys=MARIA_KEYS-first_key; + break; + case 'P': + pack_type=0; /* Don't use DIFF_LENGTH */ + pack_seg=0; + break; + case 'R': /* Length of record pointer */ + rec_pointer_size=atoi(++pos); + if (rec_pointer_size > 7) + rec_pointer_size=0; + break; + case 'S': + pack_fields=0; /* Static-length-records */ + break; + case 's': + silent=1; + break; + case 't': + testflag=atoi(++pos); /* testmod */ + break; + case 'q': + opt_quick_mode=1; + break; + case 'c': + create_flag|= HA_CREATE_CHECKSUM; + break; + case 'D': + create_flag|=HA_CREATE_DELAY_KEY_WRITE; + break; + case '?': + case 'I': + case 'V': + printf("%s Ver 1.2 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + printf("Usage: %s [-?AbBcDIKLPRqSsVWltv] [-k#] [-f#] [-m#] [-e#] [-E#] [-t#]\n", + progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + default: + printf("Illegal option: '%c'\n",*pos); + break; + } + } + return; +} /* get options */ + + /* Get a random value 0 <= x <= n */ + +static uint rnd(uint max_value) +{ + return (uint) ((rand() & 32767)/32767.0*max_value); +} /* rnd */ + + + /* Create a variable length record */ + +static void fix_length(byte *rec, uint length) +{ + bmove(rec+STANDARD_LENGTH, + "0123456789012345678901234567890123456789012345678901234567890", + length-STANDARD_LENGTH); + strfill(rec+length,STANDARD_LENGTH+60-length,' '); +} /* fix_length */ + + + /* Put maybe a blob in record */ + +static void put_blob_in_record(char *blob_pos, char **blob_buffer) +{ + ulong i,length; + if (use_blob) + { + if (rnd(10) == 0) + { + if (! *blob_buffer && + !(*blob_buffer=my_malloc((uint) use_blob,MYF(MY_WME)))) + { + use_blob=0; + return; + } + length=rnd(use_blob); + for (i=0 ; i < length ; i++) + (*blob_buffer)[i]=(char) (length+i); + int4store(blob_pos,length); + memcpy_fixed(blob_pos+4,(char*) blob_buffer,sizeof(char*)); + } + else + { + int4store(blob_pos,0); + } + } + return; +} + + +static void copy_key(MARIA_HA *info,uint inx,uchar *rec,uchar *key_buff) +{ + HA_KEYSEG *keyseg; + + for (keyseg=info->s->keyinfo[inx].seg ; keyseg->type ; keyseg++) + { + memcpy(key_buff,rec+keyseg->start,(size_t) keyseg->length); + key_buff+=keyseg->length; + } + return; +} diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c new file mode 100644 index 00000000000..bfb2c93a95f --- /dev/null +++ b/storage/maria/ma_test3.c @@ -0,0 +1,502 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Test av locking */ + +#ifndef __NETWARE__ + +#include "maria.h" +#include +#ifdef HAVE_SYS_WAIT_H +# include +#endif +#ifndef WEXITSTATUS +# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) +#endif +#ifndef WIFEXITED +# define WIFEXITED(stat_val) (((stat_val) & 255) == 0) +#endif + + +#if defined(HAVE_LRAND48) +#define rnd(X) (lrand48() % X) +#define rnd_init(X) srand48(X) +#else +#define rnd(X) (random() % X) +#define rnd_init(X) srandom(X) +#endif + + +const char *filename= "test3"; +uint tests=10,forks=10,key_cacheing=0,use_log=0; + +static void get_options(int argc, char *argv[]); +void start_test(int id); +int test_read(MARIA_HA *,int),test_write(MARIA_HA *,int,int), + test_update(MARIA_HA *,int,int),test_rrnd(MARIA_HA *,int); + +struct record { + char id[8]; + char nr[4]; + char text[10]; +} record; + + +int main(int argc,char **argv) +{ + int status,wait_ret; + uint i=0; + MARIA_KEYDEF keyinfo[10]; + MARIA_COLUMNDEF recinfo[10]; + HA_KEYSEG keyseg[10][2]; + MY_INIT(argv[0]); + get_options(argc,argv); + + maria_init(); + bzero((char*) keyinfo,sizeof(keyinfo)); + bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) keyseg,sizeof(keyseg)); + keyinfo[0].seg= &keyseg[0][0]; + keyinfo[0].seg[0].start=0; + keyinfo[0].seg[0].length=8; + keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].flag=HA_SPACE_PACK; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].keysegs=1; + keyinfo[0].flag = (uint8) HA_PACK_KEY; + keyinfo[1].seg= &keyseg[1][0]; + keyinfo[1].seg[0].start=8; + keyinfo[1].seg[0].length=4; /* Long is always 4 in maria */ + keyinfo[1].seg[0].type=HA_KEYTYPE_LONG_INT; + keyinfo[1].seg[0].flag=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; + keyinfo[1].keysegs=1; + keyinfo[1].flag =HA_NOSAME; + + recinfo[0].type=0; + recinfo[0].length=sizeof(record.id); + recinfo[1].type=0; + recinfo[1].length=sizeof(record.nr); + recinfo[2].type=0; + recinfo[2].length=sizeof(record.text); + + puts("- Creating maria-file"); + my_delete(filename,MYF(0)); /* Remove old locks under gdb */ + if (maria_create(filename,2,&keyinfo[0],2,&recinfo[0],0,(MARIA_UNIQUEDEF*) 0, + (MARIA_CREATE_INFO*) 0,0)) + exit(1); + + rnd_init(0); + printf("- Starting %d processes\n",forks); fflush(stdout); + for (i=0 ; i < forks; i++) + { + if (!fork()) + { + start_test(i+1); + sleep(1); + return 0; + } + VOID(rnd(1)); + } + + for (i=0 ; i < forks ; i++) + while ((wait_ret=wait(&status)) && wait_ret == -1); + maria_end(); + return 0; +} + + +static void get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'l': + use_log=1; + break; + case 'f': + forks=atoi(++pos); + break; + case 't': + tests=atoi(++pos); + break; + case 'K': /* Use key cacheing */ + key_cacheing=1; + break; + case 'A': /* All flags */ + use_log=key_cacheing=1; + break; + case '?': + case 'I': + case 'V': + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + puts("Test av locking with threads\n"); + printf("Usage: %s [-?lKA] [-f#] [-t#]\n",progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + default: + printf("Illegal option: '%c'\n",*pos); + break; + } + } + return; +} + + +void start_test(int id) +{ + uint i; + int error,lock_type; + MARIA_INFO isam_info; + MARIA_HA *file,*file1,*file2=0,*lock; + + if (use_log) + maria_logging(1); + if (!(file1=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)) || + !(file2=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) + { + fprintf(stderr,"Can't open isam-file: %s\n",filename); + exit(1); + } + if (key_cacheing && rnd(2) == 0) + init_key_cache(maria_key_cache, KEY_CACHE_BLOCK_SIZE, 65536L, 0, 0); + printf("Process %d, pid: %d\n",id,getpid()); fflush(stdout); + + for (error=i=0 ; i < tests && !error; i++) + { + file= (rnd(2) == 1) ? file1 : file2; + lock=0 ; lock_type=0; + if (rnd(10) == 0) + { + if (maria_lock_database(lock=(rnd(2) ? file1 : file2), + lock_type=(rnd(2) == 0 ? F_RDLCK : F_WRLCK))) + { + fprintf(stderr,"%2d: start: Can't lock table %d\n",id,my_errno); + error=1; + break; + } + } + switch (rnd(4)) { + case 0: error=test_read(file,id); break; + case 1: error=test_rrnd(file,id); break; + case 2: error=test_write(file,id,lock_type); break; + case 3: error=test_update(file,id,lock_type); break; + } + if (lock) + maria_lock_database(lock,F_UNLCK); + } + if (!error) + { + maria_status(file1,&isam_info,HA_STATUS_VARIABLE); + printf("%2d: End of test. Records: %ld Deleted: %ld\n", + id,(long) isam_info.records, (long) isam_info.deleted); + fflush(stdout); + } + + maria_close(file1); + maria_close(file2); + if (use_log) + maria_logging(0); + if (error) + { + printf("%2d: Aborted\n",id); fflush(stdout); + exit(1); + } +} + + +int test_read(MARIA_HA *file,int id) +{ + uint i,lock,found,next,prev; + ulong find; + + lock=0; + if (rnd(2) == 0) + { + lock=1; + if (maria_lock_database(file,F_RDLCK)) + { + fprintf(stderr,"%2d: Can't lock table %d\n",id,my_errno); + return 1; + } + } + + found=next=prev=0; + for (i=0 ; i < 100 ; i++) + { + find=rnd(100000); + if (!maria_rkey(file,record.id,1,(byte*) &find, + sizeof(find),HA_READ_KEY_EXACT)) + found++; + else + { + if (my_errno != HA_ERR_KEY_NOT_FOUND) + { + fprintf(stderr,"%2d: Got error %d from read in read\n",id,my_errno); + return 1; + } + else if (!maria_rnext(file,record.id,1)) + next++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in read\n",id,my_errno); + return 1; + } + else if (!maria_rprev(file,record.id,1)) + prev++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in read\n", + id,my_errno); + return 1; + } + } + } + } + } + if (lock) + { + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + return 1; + } + } + printf("%2d: read: found: %5d next: %5d prev: %5d\n", + id,found,next,prev); + fflush(stdout); + return 0; +} + + +int test_rrnd(MARIA_HA *file,int id) +{ + uint count,lock; + + lock=0; + if (rnd(2) == 0) + { + lock=1; + if (maria_lock_database(file,F_RDLCK)) + { + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + maria_close(file); + return 1; + } + if (rnd(2) == 0) + maria_extra(file,HA_EXTRA_CACHE,0); + } + + count=0; + if (maria_rrnd(file,record.id,0L)) + { + if (my_errno == HA_ERR_END_OF_FILE) + goto end; + fprintf(stderr,"%2d: Can't read first record (%d)\n",id,my_errno); + return 1; + } + for (count=1 ; !maria_rrnd(file,record.id,HA_OFFSET_ERROR) ;count++) ; + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rrnd\n",id,my_errno); + return 1; + } + +end: + if (lock) + { + maria_extra(file,HA_EXTRA_NO_CACHE,0); + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + exit(0); + } + } + printf("%2d: rrnd: %5d\n",id,count); fflush(stdout); + return 0; +} + + +int test_write(MARIA_HA *file,int id,int lock_type) +{ + uint i,tries,count,lock; + + lock=0; + if (rnd(2) == 0 || lock_type == F_RDLCK) + { + lock=1; + if (maria_lock_database(file,F_WRLCK)) + { + if (lock_type == F_RDLCK && my_errno == EDEADLK) + { + printf("%2d: write: deadlock\n",id); fflush(stdout); + return 0; + } + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + maria_close(file); + return 1; + } + if (rnd(2) == 0) + maria_extra(file,HA_EXTRA_WRITE_CACHE,0); + } + + sprintf(record.id,"%7d",getpid()); + strnmov(record.text,"Testing...", sizeof(record.text)); + + tries=(uint) rnd(100)+10; + for (i=count=0 ; i < tries ; i++) + { + uint32 tmp=rnd(80000)+20000; + int4store(record.nr,tmp); + if (!maria_write(file,record.id)) + count++; + else + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY) + { + fprintf(stderr,"%2d: Got error %d (errno %d) from write\n",id,my_errno, + errno); + return 1; + } + } + } + if (lock) + { + maria_extra(file,HA_EXTRA_NO_CACHE,0); + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + exit(0); + } + } + printf("%2d: write: %5d\n",id,count); fflush(stdout); + return 0; +} + + +int test_update(MARIA_HA *file,int id,int lock_type) +{ + uint i,lock,found,next,prev,update; + uint32 tmp; + char find[4]; + struct record new_record; + + lock=0; + if (rnd(2) == 0 || lock_type == F_RDLCK) + { + lock=1; + if (maria_lock_database(file,F_WRLCK)) + { + if (lock_type == F_RDLCK && my_errno == EDEADLK) + { + printf("%2d: write: deadlock\n",id); fflush(stdout); + return 0; + } + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + return 1; + } + } + bzero((char*) &new_record,sizeof(new_record)); + strmov(new_record.text,"Updated"); + + found=next=prev=update=0; + for (i=0 ; i < 100 ; i++) + { + tmp=rnd(100000); + int4store(find,tmp); + if (!maria_rkey(file,record.id,1,(byte*) find, + sizeof(find),HA_READ_KEY_EXACT)) + found++; + else + { + if (my_errno != HA_ERR_KEY_NOT_FOUND) + { + fprintf(stderr,"%2d: Got error %d from read in update\n",id,my_errno); + return 1; + } + else if (!maria_rnext(file,record.id,1)) + next++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in update\n", + id,my_errno); + return 1; + } + else if (!maria_rprev(file,record.id,1)) + prev++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in update\n", + id,my_errno); + return 1; + } + continue; + } + } + } + memcpy_fixed(new_record.id,record.id,sizeof(record.id)); + tmp=rnd(20000)+40000; + int4store(new_record.nr,tmp); + if (!maria_update(file,record.id,new_record.id)) + update++; + else + { + if (my_errno != HA_ERR_RECORD_CHANGED && + my_errno != HA_ERR_RECORD_DELETED && + my_errno != HA_ERR_FOUND_DUPP_KEY) + { + fprintf(stderr,"%2d: Got error %d from update\n",id,my_errno); + return 1; + } + } + } + if (lock) + { + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"Can't unlock table,id, error%d\n",my_errno); + return 1; + } + } + printf("%2d: update: %5d\n",id,update); fflush(stdout); + return 0; +} + +#else /* __NETWARE__ */ + +#include + +main() +{ + fprintf(stderr,"this test has not been ported to NetWare\n"); + return 0; +} + +#endif /* __NETWARE__ */ diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh new file mode 100755 index 00000000000..d848bc63b9a --- /dev/null +++ b/storage/maria/ma_test_all.sh @@ -0,0 +1,147 @@ +#!/bin/sh +# +# Execute some simple basic test on MyISAM libary to check if things +# works at all. + +valgrind="valgrind --alignment=8 --leak-check=yes" +silent="-s" + +if test -f ma_test1$MACH ; then suffix=$MACH else suffix=""; fi +ma_test1$suffix $silent +maria_chk$suffix -se test1 +ma_test1$suffix $silent -N -S +maria_chk$suffix -se test1 +ma_test1$suffix $silent -P --checksum +maria_chk$suffix -se test1 +ma_test1$suffix $silent -P -N -S +maria_chk$suffix -se test1 +ma_test1$suffix $silent -B -N -R2 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -k 480 --unique +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -N -S -R1 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -p -S +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -p -S -N --unique +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -p -S -N --key_length=127 --checksum +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -p -S -N --key_length=128 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -p -S --key_length=480 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -B +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -B --key_length=64 --unique +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -B -k 480 --checksum +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -m +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -m -P --unique --checksum +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -m -p +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -w -S --unique +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -w --key_length=64 --checksum +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -w -N --key_length=480 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -w -S --key_length=480 --checksum +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -b -N +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -a -b --key_length=480 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent -p -B --key_length=480 +maria_chk$suffix -sm test1 + +ma_test1$suffix $silent --checksum +maria_chk$suffix -se test1 +maria_chk$suffix -rs test1 +maria_chk$suffix -se test1 +maria_chk$suffix -rqs test1 +maria_chk$suffix -se test1 +maria_chk$suffix -rs --correct-checksum test1 +maria_chk$suffix -se test1 +maria_chk$suffix -rqs --correct-checksum test1 +maria_chk$suffix -se test1 +maria_chk$suffix -ros --correct-checksum test1 +maria_chk$suffix -se test1 +maria_chk$suffix -rqos --correct-checksum test1 +maria_chk$suffix -se test1 + +# check of maria_pack / maria_chk +maria_pack$suffix --force -s test1 +maria_chk$suffix -es test1 +maria_chk$suffix -rqs test1 +maria_chk$suffix -es test1 +maria_chk$suffix -rs test1 +maria_chk$suffix -es test1 +maria_chk$suffix -rus test1 +maria_chk$suffix -es test1 + +ma_test1$suffix $silent --checksum -S +maria_chk$suffix -se test1 +maria_chk$suffix -ros test1 +maria_chk$suffix -rqs test1 +maria_chk$suffix -se test1 + +maria_pack$suffix --force -s test1 +maria_chk$suffix -rqs test1 +maria_chk$suffix -es test1 +maria_chk$suffix -rus test1 +maria_chk$suffix -es test1 + +ma_test1$suffix $silent --checksum --unique +maria_chk$suffix -se test1 +ma_test1$suffix $silent --unique -S +maria_chk$suffix -se test1 + + +ma_test1$suffix $silent --key_multiple -N -S +maria_chk$suffix -sm test1 +ma_test1$suffix $silent --key_multiple -a -p --key_length=480 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent --key_multiple -a -B --key_length=480 +maria_chk$suffix -sm test1 +ma_test1$suffix $silent --key_multiple -P -S +maria_chk$suffix -sm test1 + +ma_test2$suffix $silent -L -K -W -P +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -L -K -W -P -A +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 +echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -L -K -R1 -m2000 +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -L -K -P -S -R3 -m50 -b1000000 +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -L -B +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -D -B -c +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -m10000 -e8192 -K +maria_chk$suffix -sm test2 +ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L +maria_chk$suffix -sm test2 + +ma_test2$suffix $silent -L -K -W -P -m50 -l +maria_log$suffix +ma_test2$suffix $silent -L -K -W -P -m50 -l -b100 +maria_log$suffix +time ma_test2$suffix $silent +time ma_test2$suffix $silent -K -B +time ma_test2$suffix $silent -L -B +time ma_test2$suffix $silent -L -K -B +time ma_test2$suffix $silent -L -K -W -B +time ma_test2$suffix $silent -L -K -W -S -B +time ma_test2$suffix $silent -D -K -W -S -B diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c new file mode 100644 index 00000000000..bc1aa71966b --- /dev/null +++ b/storage/maria/ma_unique.c @@ -0,0 +1,234 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Functions to check if a row is unique */ + +#include "maria_def.h" +#include + +my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, + ha_checksum unique_hash, my_off_t disk_pos) +{ + my_off_t lastpos=info->lastpos; + MARIA_KEYDEF *key= &info->s->keyinfo[def->key]; + uchar *key_buff=info->lastkey2; + DBUG_ENTER("_ma_check_unique"); + + maria_unique_store(record+key->seg->start, unique_hash); + _ma_make_key(info,def->key,key_buff,record,0); + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (_ma_search(info,info->s->keyinfo+def->key,key_buff,MARIA_UNIQUE_HASH_LENGTH, + SEARCH_FIND,info->s->state.key_root[def->key])) + { + info->page_changed=1; /* Can't optimize read next */ + info->lastpos= lastpos; + DBUG_RETURN(0); /* No matching rows */ + } + + for (;;) + { + if (info->lastpos != disk_pos && + !(*info->s->compare_unique)(info,def,record,info->lastpos)) + { + my_errno=HA_ERR_FOUND_DUPP_UNIQUE; + info->errkey= (int) def->key; + info->dupp_key_pos= info->lastpos; + info->page_changed=1; /* Can't optimize read next */ + info->lastpos=lastpos; + DBUG_PRINT("info",("Found duplicate")); + DBUG_RETURN(1); /* Found identical */ + } + if (_ma_search_next(info,info->s->keyinfo+def->key, info->lastkey, + MARIA_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, + info->s->state.key_root[def->key]) || + memcmp((char*) info->lastkey, (char*) key_buff, + MARIA_UNIQUE_HASH_LENGTH)) + { + info->page_changed=1; /* Can't optimize read next */ + info->lastpos=lastpos; + DBUG_RETURN(0); /* end of tree */ + } + } +} + + +/* + Calculate a hash for a row + + TODO + Add support for bit fields +*/ + +ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *record) +{ + const byte *pos, *end; + ha_checksum crc= 0; + ulong seed1=0, seed2= 4; + HA_KEYSEG *keyseg; + + for (keyseg=def->seg ; keyseg < def->end ; keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=keyseg->length; + + if (keyseg->null_bit) + { + if (record[keyseg->null_pos] & keyseg->null_bit) + { + /* + Change crc in a way different from an empty string or 0. + (This is an optimisation; The code will work even if this isn't + done) + */ + crc=((crc << 8) + 511+ + (crc >> (8*sizeof(ha_checksum)-8))); + continue; + } + } + pos= record+keyseg->start; + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ + set_if_smaller(length,tmp_length); + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); + memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*)); + if (!length || length > tmp_length) + length=tmp_length; /* The whole blob */ + } + end= pos+length; + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) + { + keyseg->charset->coll->hash_sort(keyseg->charset, + (const uchar*) pos, length, &seed1, + &seed2); + crc^= seed1; + } + else + while (pos != end) + crc=((crc << 8) + + (((uchar) *(uchar*) pos++))) + + (crc >> (8*sizeof(ha_checksum)-8)); + } + return crc; +} + + +/* + compare unique key for two rows + + TODO + Add support for bit fields + + RETURN + 0 if both rows have equal unique value + # Rows are different +*/ + +int _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal) +{ + const byte *pos_a, *pos_b, *end; + HA_KEYSEG *keyseg; + + for (keyseg=def->seg ; keyseg < def->end ; keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint a_length, b_length; + a_length= b_length= keyseg->length; + + /* If part is NULL it's regarded as different */ + if (keyseg->null_bit) + { + uint tmp; + if ((tmp=(a[keyseg->null_pos] & keyseg->null_bit)) != + (uint) (b[keyseg->null_pos] & keyseg->null_bit)) + return 1; + if (tmp) + { + if (!null_are_equal) + return 1; + continue; + } + } + pos_a= a+keyseg->start; + pos_b= b+keyseg->start; + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + if (pack_length == 1) + { + a_length= (uint) *(uchar*) pos_a++; + b_length= (uint) *(uchar*) pos_b++; + } + else + { + a_length= uint2korr(pos_a); + b_length= uint2korr(pos_b); + pos_a+= 2; /* Skip VARCHAR length */ + pos_b+= 2; + } + set_if_smaller(a_length, keyseg->length); /* Safety */ + set_if_smaller(b_length, keyseg->length); /* safety */ + } + else if (keyseg->flag & HA_BLOB_PART) + { + /* Only compare 'length' characters if length != 0 */ + a_length= _ma_calc_blob_length(keyseg->bit_start,pos_a); + b_length= _ma_calc_blob_length(keyseg->bit_start,pos_b); + /* Check that a and b are of equal length */ + if (keyseg->length) + { + /* + This is used in some cases when we are not interested in comparing + the whole length of the blob. + */ + set_if_smaller(a_length, keyseg->length); + set_if_smaller(b_length, keyseg->length); + } + memcpy_fixed((byte*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*)); + memcpy_fixed((byte*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*)); + } + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) + { + if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + (uchar *) pos_b, b_length, 0, 1)) + return 1; + } + else + { + if (a_length != b_length) + return 1; + end= pos_a+a_length; + while (pos_a != end) + { + if (*pos_a++ != *pos_b++) + return 1; + } + } + } + return 0; +} diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c new file mode 100644 index 00000000000..d3c71cef9b4 --- /dev/null +++ b/storage/maria/ma_update.c @@ -0,0 +1,232 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Update an old row in a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) +{ + int flag,key_changed,save_errno; + reg3 my_off_t pos; + uint i; + uchar old_key[HA_MAX_KEY_BUFF],*new_key; + bool auto_key_changed=0; + ulonglong changed; + MARIA_SHARE *share=info->s; + ha_checksum old_checksum; + DBUG_ENTER("maria_update"); + LINT_INIT(new_key); + LINT_INIT(changed); + LINT_INIT(old_checksum); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + if (!(info->update & HA_STATE_AKTIV)) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); + } + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (info->state->key_file_length >= share->base.margin_key_file_length) + { + DBUG_RETURN(my_errno=HA_ERR_INDEX_FILE_FULL); + } + pos=info->lastpos; + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + + if (share->calc_checksum) + old_checksum=info->checksum=(*share->calc_checksum)(info,oldrec); + if ((*share->compare_record)(info,oldrec)) + { + save_errno=my_errno; + goto err_end; /* Record has changed */ + } + + + /* Calculate and check all unique constraints */ + key_changed=0; + for (i=0 ; i < share->state.header.uniques ; i++) + { + MARIA_UNIQUEDEF *def=share->uniqueinfo+i; + if (_ma_unique_comp(def, newrec, oldrec,1) && + _ma_check_unique(info, def, newrec, _ma_unique_hash(def, newrec), + info->lastpos)) + { + save_errno=my_errno; + goto err_end; + } + } + if (_ma_mark_file_changed(info)) + { + save_errno=my_errno; + goto err_end; + } + + /* Check which keys changed from the original row */ + + new_key=info->lastkey2; + changed=0; + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(share->state.key_map, i)) + { + if (share->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_cmp(info,i,oldrec, newrec)) + { + if ((int) i == info->lastinx) + { + /* + We are changeing the index we are reading on. Mark that + the index data has changed and we need to do a full search + when doing read-next + */ + key_changed|=HA_STATE_WRITTEN; + } + changed|=((ulonglong) 1 << i); + if (_ma_ft_update(info,i,(char*) old_key,oldrec,newrec,pos)) + goto err; + } + } + else + { + uint new_length= _ma_make_key(info,i,new_key,newrec,pos); + uint old_length= _ma_make_key(info,i,old_key,oldrec,pos); + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (new_length != old_length || + memcmp((byte*) old_key,(byte*) new_key,new_length)) + { + if ((int) i == info->lastinx) + key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */ + changed|=((ulonglong) 1 << i); + share->keyinfo[i].version++; + if (share->keyinfo[i].ck_delete(info,i,old_key,old_length)) goto err; + if (share->keyinfo[i].ck_insert(info,i,new_key,new_length)) goto err; + if (share->base.auto_key == i+1) + auto_key_changed=1; + } + } + } + } + /* + If we are running with external locking, we must update the index file + that something has changed. + */ + if (changed || !my_disable_locking) + key_changed|= HA_STATE_CHANGED; + + if (share->calc_checksum) + { + info->checksum=(*share->calc_checksum)(info,newrec); + /* Store new checksum in index file header */ + key_changed|= HA_STATE_CHANGED; + } + { + /* + Don't update index file if data file is not extended and no status + information changed + */ + MARIA_STATUS_INFO state; + ha_rows org_split; + my_off_t org_delete_link; + + memcpy((char*) &state, (char*) info->state, sizeof(state)); + org_split= share->state.split; + org_delete_link= share->state.dellink; + if ((*share->update_record)(info,pos,newrec)) + goto err; + if (!key_changed && + (memcmp((char*) &state, (char*) info->state, sizeof(state)) || + org_split != share->state.split || + org_delete_link != share->state.dellink)) + key_changed|= HA_STATE_CHANGED; /* Must update index file */ + } + if (auto_key_changed) + _ma_update_auto_increment(info,newrec); + if (share->calc_checksum) + info->state->checksum+=(info->checksum - old_checksum); + + info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | HA_STATE_AKTIV | + key_changed); + maria_log_record(MARIA_LOG_UPDATE,info,newrec,info->lastpos,0); + VOID(_ma_writeinfo(info,key_changed ? WRITEINFO_UPDATE_KEYFILE : 0)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (update)", info->filename)); + (*info->invalidator)(info->filename); + info->invalidator=0; + } + DBUG_RETURN(0); + +err: + DBUG_PRINT("error",("key: %d errno: %d",i,my_errno)); + save_errno=my_errno; + if (changed) + key_changed|= HA_STATE_CHANGED; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_RECORD_FILE_FULL) + { + info->errkey= (int) i; + flag=0; + do + { + if (((ulonglong) 1 << i) & changed) + { + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if ((flag++ && _ma_ft_del(info,i,(char*) new_key,newrec,pos)) || + _ma_ft_add(info,i,(char*) old_key,oldrec,pos)) + break; + } + else + { + uint new_length= _ma_make_key(info,i,new_key,newrec,pos); + uint old_length= _ma_make_key(info,i,old_key,oldrec,pos); + if ((flag++ && _ma_ck_delete(info,i,new_key,new_length)) || + _ma_ck_write(info,i,old_key,old_length)) + break; + } + } + } while (i-- != 0); + } + else + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | + key_changed); + + err_end: + maria_log_record(MARIA_LOG_UPDATE,info,newrec,info->lastpos,my_errno); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + maria_print_error(info->s, HA_ERR_CRASHED); + save_errno=HA_ERR_CRASHED; + } + DBUG_RETURN(my_errno=save_errno); +} /* maria_update */ diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c new file mode 100644 index 00000000000..313a362f6d6 --- /dev/null +++ b/storage/maria/ma_write.c @@ -0,0 +1,1033 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Write a row to a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +#define MAX_POINTER_LENGTH 8 + + /* Functions declared in this file */ + +static int w_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo, + uint comp_flag, uchar *key, + uint key_length, my_off_t pos, uchar *father_buff, + uchar *father_keypos, my_off_t father_page, + my_bool insert_last); +static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key, + uchar *curr_buff,uchar *father_buff, + uchar *father_keypos,my_off_t father_page); +static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint *return_key_length, + uchar **after_key); +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,uchar *key, + uint key_length); +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,uchar *key, + uint key_length); + + /* Write new record to database */ + +int maria_write(MARIA_HA *info, byte *record) +{ + MARIA_SHARE *share=info->s; + uint i; + int save_errno; + my_off_t filepos; + uchar *buff; + my_bool lock_tree= share->concurrent_insert; + DBUG_ENTER("maria_write"); + DBUG_PRINT("enter",("isam: %d data: %d",info->s->kfile,info->dfile)); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + dont_break(); /* Dont allow SIGHUP or SIGINT */ +#if !defined(NO_LOCKING) && defined(USE_RECORD_LOCK) + if (!info->locked && my_lock(info->dfile,F_WRLCK,0L,F_TO_EOF, + MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) + goto err; +#endif + filepos= ((share->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) ? + share->state.dellink : + info->state->data_file_length); + + if (share->base.reloc == (ha_rows) 1 && + share->base.records == (ha_rows) 1 && + info->state->records == (ha_rows) 1) + { /* System file */ + my_errno=HA_ERR_RECORD_FILE_FULL; + goto err2; + } + if (info->state->key_file_length >= share->base.margin_key_file_length) + { + my_errno=HA_ERR_INDEX_FILE_FULL; + goto err2; + } + if (_ma_mark_file_changed(info)) + goto err2; + + /* Calculate and check all unique constraints */ + for (i=0 ; i < share->state.header.uniques ; i++) + { + if (_ma_check_unique(info,share->uniqueinfo+i,record, + _ma_unique_hash(share->uniqueinfo+i,record), + HA_OFFSET_ERROR)) + goto err2; + } + + /* Write all keys to indextree */ + + buff=info->lastkey2; + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(share->state.key_map, i)) + { + bool local_lock_tree= (lock_tree && + !(info->bulk_insert && + is_tree_inited(&info->bulk_insert[i]))); + if (local_lock_tree) + { + rw_wrlock(&share->key_root_lock[i]); + share->keyinfo[i].version++; + } + if (share->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_add(info,i,(char*) buff,record,filepos)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } + } + else + { + if (share->keyinfo[i].ck_insert(info,i,buff, + _ma_make_key(info,i,buff,record, + filepos))) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } + } + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + } + } + if (share->calc_checksum) + info->checksum=(*share->calc_checksum)(info,record); + if (!(info->opt_flag & OPT_NO_ROWS)) + { + if ((*share->write_record)(info,record)) + goto err; + info->state->checksum+=info->checksum; + } + if (share->base.auto_key) + _ma_update_auto_increment(info,record); + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN | + HA_STATE_ROW_CHANGED); + info->state->records++; + info->lastpos=filepos; + maria_log_record(MARIA_LOG_WRITE,info,record,filepos,0); + VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (update)", info->filename)); + (*info->invalidator)(info->filename); + info->invalidator=0; + } + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(0); + +err: + save_errno=my_errno; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_RECORD_FILE_FULL || + my_errno == HA_ERR_NULL_IN_SPATIAL) + { + if (info->bulk_insert) + { + uint j; + for (j=0 ; j < share->base.keys ; j++) + maria_flush_bulk_insert(info, j); + } + info->errkey= (int) i; + while ( i-- > 0) + { + if (maria_is_key_active(share->state.key_map, i)) + { + bool local_lock_tree= (lock_tree && + !(info->bulk_insert && + is_tree_inited(&info->bulk_insert[i]))); + if (local_lock_tree) + rw_wrlock(&share->key_root_lock[i]); + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if (_ma_ft_del(info,i,(char*) buff,record,filepos)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + break; + } + } + else + { + uint key_length= _ma_make_key(info,i,buff,record,filepos); + if (_ma_ck_delete(info,i,buff,key_length)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + break; + } + } + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + } + } + } + else + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); + my_errno=save_errno; +err2: + save_errno=my_errno; + maria_log_record(MARIA_LOG_WRITE,info,record,filepos,my_errno); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(my_errno=save_errno); +} /* maria_write */ + + + /* Write one key to btree */ + +int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, uint key_length) +{ + DBUG_ENTER("_ma_ck_write"); + + if (info->bulk_insert && is_tree_inited(&info->bulk_insert[keynr])) + { + DBUG_RETURN(_ma_ck_write_tree(info, keynr, key, key_length)); + } + else + { + DBUG_RETURN(_ma_ck_write_btree(info, keynr, key, key_length)); + } +} /* _ma_ck_write */ + + +/********************************************************************** + * Normal insert code * + **********************************************************************/ + +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key, + uint key_length) +{ + int error; + uint comp_flag; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+keynr; + my_off_t *root=&info->s->state.key_root[keynr]; + DBUG_ENTER("_ma_ck_write_btree"); + + if (keyinfo->flag & HA_SORT_ALLOWS_SAME) + comp_flag=SEARCH_BIGGER; /* Put after same key */ + else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT)) + { + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */ + if (keyinfo->flag & HA_NULL_ARE_EQUAL) + comp_flag|= SEARCH_NULL_ARE_EQUAL; + } + else + comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ + + error= _ma_ck_real_write_btree(info, keyinfo, key, key_length, + root, comp_flag); + if (info->ft1_to_ft2) + { + if (!error) + error= _ma_ft_convert_to_ft2(info, keynr, key); + delete_dynamic(info->ft1_to_ft2); + my_free((gptr)info->ft1_to_ft2, MYF(0)); + info->ft1_to_ft2=0; + } + DBUG_RETURN(error); +} /* _ma_ck_write_btree */ + +int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, uint key_length, my_off_t *root, uint comp_flag) +{ + int error; + DBUG_ENTER("_ma_ck_real_write_btree"); + /* key_length parameter is used only if comp_flag is SEARCH_FIND */ + if (*root == HA_OFFSET_ERROR || + (error=w_search(info, keyinfo, comp_flag, key, key_length, + *root, (uchar *) 0, (uchar*) 0, + (my_off_t) 0, 1)) > 0) + error= _ma_enlarge_root(info,keyinfo,key,root); + DBUG_RETURN(error); +} /* _ma_ck_real_write_btree */ + + + /* Make a new root with key as only pointer */ + +int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + my_off_t *root) +{ + uint t_length,nod_flag; + MARIA_KEY_PARAM s_temp; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_enlarge_root"); + + nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0; + _ma_kpointer(info,info->buff+2,*root); /* if nod */ + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar*) 0, + (uchar*) 0, (uchar*) 0, key,&s_temp); + maria_putint(info->buff,t_length+2+nod_flag,nod_flag); + (*keyinfo->store_key)(keyinfo,info->buff+2+nod_flag,&s_temp); + info->buff_used=info->page_changed=1; /* info->buff is used */ + if ((*root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _ma_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + DBUG_RETURN(0); +} /* _ma_enlarge_root */ + + + /* + Search after a position for a key and store it there + Returns -1 = error + 0 = ok + 1 = key should be stored in higher tree + */ + +static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uint comp_flag, uchar *key, uint key_length, my_off_t page, + uchar *father_buff, uchar *father_keypos, + my_off_t father_page, my_bool insert_last) +{ + int error,flag; + uint nod_flag, search_key_length; + uchar *temp_buff,*keypos; + uchar keybuff[HA_MAX_KEY_BUFF]; + my_bool was_last_key; + my_off_t next_page, dupp_key_pos; + DBUG_ENTER("w_search"); + DBUG_PRINT("enter",("page: %ld",page)); + + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + DBUG_RETURN(-1); + if (!_ma_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) + goto err; + + flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length, + comp_flag, &keypos, keybuff, &was_last_key); + nod_flag= _ma_test_if_nod(temp_buff); + if (flag == 0) + { + uint tmp_key_length; + /* get position to record with duplicated key */ + tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff); + if (tmp_key_length) + dupp_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length); + else + dupp_key_pos= HA_OFFSET_ERROR; + + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, keybuff); + subkeys=ft_sintXkorr(keybuff+off); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + flag=(*keyinfo->bin_search)(info, keyinfo, temp_buff, key, + USE_WHOLE_KEY, comp_flag, + &keypos, keybuff, &was_last_key); + } + else + { + /* popular word. two-level tree. going down */ + my_off_t root=dupp_key_pos; + keyinfo=&info->s->ft2_keyinfo; + get_key_full_length_rdonly(off, key); + key+=off; + keypos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + error= _ma_ck_real_write_btree(info, keyinfo, key, 0, + &root, comp_flag); + _ma_dpointer(info, keypos+HA_FT_WLEN, root); + subkeys--; /* should there be underflow protection ? */ + DBUG_ASSERT(subkeys < 0); + ft_intXstore(keypos, subkeys); + if (!error) + error= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff); + my_afree((byte*) temp_buff); + DBUG_RETURN(error); + } + } + else /* not HA_FULLTEXT, normal HA_NOSAME key */ + { + info->dupp_key_pos= dupp_key_pos; + my_afree((byte*) temp_buff); + my_errno=HA_ERR_FOUND_DUPP_KEY; + DBUG_RETURN(-1); + } + } + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); + if (!was_last_key) + insert_last=0; + next_page= _ma_kpos(nod_flag,keypos); + if (next_page == HA_OFFSET_ERROR || + (error=w_search(info, keyinfo, comp_flag, key, key_length, next_page, + temp_buff, keypos, page, insert_last)) >0) + { + error= _ma_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff, + father_keypos,father_page, insert_last); + if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff)) + goto err; + } + my_afree((byte*) temp_buff); + DBUG_RETURN(error); +err: + my_afree((byte*) temp_buff); + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1); +} /* w_search */ + + +/* + Insert new key. + + SYNOPSIS + _ma_insert() + info Open table information. + keyinfo Key definition information. + key New key. + anc_buff Key page (beginning). + key_pos Position in key page where to insert. + key_buff Copy of previous key. + father_buff parent key page for balancing. + father_key_pos position in parent key page for balancing. + father_page position of parent key page in file. + insert_last If to append at end of page. + + DESCRIPTION + Insert new key at right of key_pos. + + RETURN + 2 if key contains key to upper level. + 0 OK. + < 0 Error. +*/ + +int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff, + uchar *father_buff, uchar *father_key_pos, my_off_t father_page, + my_bool insert_last) +{ + uint a_length,nod_flag; + int t_length; + uchar *endpos, *prev_key; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("_ma_insert"); + DBUG_PRINT("enter",("key_pos: %lx",key_pos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key, + USE_WHOLE_KEY);); + + nod_flag=_ma_test_if_nod(anc_buff); + a_length=maria_getint(anc_buff); + endpos= anc_buff+ a_length; + prev_key=(key_pos == anc_buff+2+nod_flag ? (uchar*) 0 : key_buff); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (key_pos == endpos ? (uchar*) 0 : key_pos), + prev_key, prev_key, + key,&s_temp); +#ifndef DBUG_OFF + if (key_pos != anc_buff+2+nod_flag && (keyinfo->flag & + (HA_BINARY_PACK_KEY | HA_PACK_KEY))) + { + DBUG_DUMP("prev_key",(byte*) key_buff, _ma_keylength(keyinfo,key_buff)); + } + if (keyinfo->flag & HA_PACK_KEY) + { + DBUG_PRINT("test",("t_length: %d ref_len: %d", + t_length,s_temp.ref_length)); + DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: %lx", + s_temp.n_ref_length,s_temp.n_length,s_temp.key)); + } +#endif + if (t_length > 0) + { + if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + bmove_upp((byte*) endpos+t_length,(byte*) endpos,(uint) (endpos-key_pos)); + } + else + { + if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length); + } + (*keyinfo->store_key)(keyinfo,key_pos,&s_temp); + a_length+=t_length; + maria_putint(anc_buff,a_length,nod_flag); + if (a_length <= keyinfo->block_length) + { + if (keyinfo->block_length - a_length < 32 && + keyinfo->flag & HA_FULLTEXT && key_pos == endpos && + info->s->base.key_reflength <= info->s->base.rec_reflength && + info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) + { + /* + Normal word. One-level tree. Page is almost full. + Let's consider converting. + We'll compare 'key' and the first key at anc_buff + */ + uchar *a=key, *b=anc_buff+2+nod_flag; + uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength; + /* the very first key on the page is always unpacked */ + DBUG_ASSERT((*b & 128) == 0); +#if HA_FT_MAXLEN >= 127 + blen= mi_uint2korr(b); b+=2; +#else + blen= *b++; +#endif + get_key_length(alen,a); + DBUG_ASSERT(info->ft1_to_ft2==0); + if (alen == blen && + ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) + { + /* yup. converting */ + info->ft1_to_ft2=(DYNAMIC_ARRAY *) + my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME)); + my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50); + + /* + now, adding all keys from the page to dynarray + if the page is a leaf (if not keys will be deleted later) + */ + if (!nod_flag) + { + /* let's leave the first key on the page, though, because + we cannot easily dispatch an empty page here */ + b+=blen+ft2len+2; + for (a=anc_buff+a_length ; b < a ; b+=ft2len+2) + insert_dynamic(info->ft1_to_ft2, (char*) b); + + /* fixing the page's length - it contains only one key now */ + maria_putint(anc_buff,2+blen+ft2len+2,0); + } + /* the rest will be done when we're back from recursion */ + } + } + DBUG_RETURN(0); /* There is room on page */ + } + /* Page is full */ + if (nod_flag) + insert_last=0; + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)) && + father_buff && !insert_last) + DBUG_RETURN(_ma_balance_page(info,keyinfo,key,anc_buff,father_buff, + father_key_pos,father_page)); + DBUG_RETURN(_ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last)); +} /* _ma_insert */ + + + /* split a full page in two and assign emerging item to key */ + +int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uchar *key, uchar *buff, uchar *key_buff, + my_bool insert_last_key) +{ + uint length,a_length,key_ref_length,t_length,nod_flag,key_length; + uchar *key_pos,*pos, *after_key; + my_off_t new_pos; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("maria_split_page"); + DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); + + if (info->s->keyinfo+info->lastinx == keyinfo) + info->page_changed=1; /* Info->buff is used */ + info->buff_used=1; + nod_flag=_ma_test_if_nod(buff); + key_ref_length=2+nod_flag; + if (insert_last_key) + key_pos= _ma_find_last_pos(keyinfo,buff,key_buff, &key_length, &after_key); + else + key_pos= _ma_find_half_pos(nod_flag,keyinfo,buff,key_buff, &key_length, + &after_key); + if (!key_pos) + DBUG_RETURN(-1); + + length=(uint) (key_pos-buff); + a_length=maria_getint(buff); + maria_putint(buff,length,nod_flag); + + key_pos=after_key; + if (nod_flag) + { + DBUG_PRINT("test",("Splitting nod")); + pos=key_pos-nod_flag; + memcpy((byte*) info->buff+2,(byte*) pos,(size_t) nod_flag); + } + + /* Move middle item to key and pointer to new page */ + if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(-1); + _ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos); + + /* Store new page */ + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff)) + DBUG_RETURN(-1); + + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar *) 0, + (uchar*) 0, (uchar*) 0, + key_buff, &s_temp); + length=(uint) ((buff+a_length)-key_pos); + memcpy((byte*) info->buff+key_ref_length+t_length,(byte*) key_pos, + (size_t) length); + (*keyinfo->store_key)(keyinfo,info->buff+key_ref_length,&s_temp); + maria_putint(info->buff,length+t_length+key_ref_length,nod_flag); + + if (_ma_write_keypage(info,keyinfo,new_pos,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + DBUG_DUMP("key",(byte*) key, _ma_keylength(keyinfo,key)); + DBUG_RETURN(2); /* Middle key up */ +} /* _ma_split_page */ + + + /* + Calculate how to much to move to split a page in two + Returns pointer to start of key. + key will contain the key. + return_key_length will contain the length of key + after_key will contain the position to where the next key starts + */ + +uchar *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint *return_key_length, + uchar **after_key) +{ + uint keys,length,key_ref_length; + uchar *end,*lastpos; + DBUG_ENTER("_ma_find_half_pos"); + + key_ref_length=2+nod_flag; + length=maria_getint(page)-key_ref_length; + page+=key_ref_length; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + key_ref_length=keyinfo->keylength+nod_flag; + keys=length/(key_ref_length*2); + *return_key_length=keyinfo->keylength; + end=page+keys*key_ref_length; + *after_key=end+key_ref_length; + memcpy(key,end,key_ref_length); + DBUG_RETURN(end); + } + + end=page+length/2-key_ref_length; /* This is aprox. half */ + *key='\0'; + do + { + lastpos=page; + if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key))) + DBUG_RETURN(0); + } while (page < end); + *return_key_length=length; + *after_key=page; + DBUG_PRINT("exit",("returns: %lx page: %lx half: %lx",lastpos,page,end)); + DBUG_RETURN(lastpos); +} /* _ma_find_half_pos */ + + + /* + Split buffer at last key + Returns pointer to the start of the key before the last key + key will contain the last key + */ + +static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, + uchar *key, uint *return_key_length, + uchar **after_key) +{ + uint keys,length,last_length,key_ref_length; + uchar *end,*lastpos,*prevpos; + uchar key_buff[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_find_last_pos"); + + key_ref_length=2; + length=maria_getint(page)-key_ref_length; + page+=key_ref_length; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + keys=length/keyinfo->keylength-2; + *return_key_length=length=keyinfo->keylength; + end=page+keys*length; + *after_key=end+length; + memcpy(key,end,length); + DBUG_RETURN(end); + } + + LINT_INIT(prevpos); + LINT_INIT(last_length); + end=page+length-key_ref_length; + *key='\0'; + length=0; + lastpos=page; + while (page < end) + { + prevpos=lastpos; lastpos=page; + last_length=length; + memcpy(key, key_buff, length); /* previous key */ + if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff))) + { + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + *return_key_length=last_length; + *after_key=lastpos; + DBUG_PRINT("exit",("returns: %lx page: %lx end: %lx",prevpos,page,end)); + DBUG_RETURN(prevpos); +} /* _ma_find_last_pos */ + + + /* Balance page with not packed keys with page on right/left */ + /* returns 0 if balance was done */ + +static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, uchar *curr_buff, uchar *father_buff, + uchar *father_key_pos, my_off_t father_page) +{ + my_bool right; + uint k_length,father_length,father_keylength,nod_flag,curr_keylength, + right_length,left_length,new_right_length,new_left_length,extra_length, + length,keys; + uchar *pos,*buff,*extra_buff; + my_off_t next_page,new_pos; + byte tmp_part_key[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_balance_page"); + + k_length=keyinfo->keylength; + father_length=maria_getint(father_buff); + father_keylength=k_length+info->s->base.key_reflength; + nod_flag=_ma_test_if_nod(curr_buff); + curr_keylength=k_length+nod_flag; + info->page_changed=1; + + if ((father_key_pos != father_buff+father_length && + (info->state->records & 1)) || + father_key_pos == father_buff+2+info->s->base.key_reflength) + { + right=1; + next_page= _ma_kpos(info->s->base.key_reflength, + father_key_pos+father_keylength); + buff=info->buff; + DBUG_PRINT("test",("use right page: %lu",next_page)); + } + else + { + right=0; + father_key_pos-=father_keylength; + next_page= _ma_kpos(info->s->base.key_reflength,father_key_pos); + /* Fix that curr_buff is to left */ + buff=curr_buff; curr_buff=info->buff; + DBUG_PRINT("test",("use left page: %lu",next_page)); + } /* father_key_pos ptr to parting key */ + + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0)) + goto err; + DBUG_DUMP("next",(byte*) info->buff,maria_getint(info->buff)); + + /* Test if there is room to share keys */ + + left_length=maria_getint(curr_buff); + right_length=maria_getint(buff); + keys=(left_length+right_length-4-nod_flag*2)/curr_keylength; + + if ((right ? right_length : left_length) + curr_keylength <= + keyinfo->block_length) + { /* Merge buffs */ + new_left_length=2+nod_flag+(keys/2)*curr_keylength; + new_right_length=2+nod_flag+((keys+1)/2)*curr_keylength; + maria_putint(curr_buff,new_left_length,nod_flag); + maria_putint(buff,new_right_length,nod_flag); + + if (left_length < new_left_length) + { /* Move keys buff -> leaf */ + pos=curr_buff+left_length; + memcpy((byte*) pos,(byte*) father_key_pos, (size_t) k_length); + memcpy((byte*) pos+k_length, (byte*) buff+2, + (size_t) (length=new_left_length - left_length - k_length)); + pos=buff+2+length; + memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length); + bmove((byte*) buff+2,(byte*) pos+k_length,new_right_length); + } + else + { /* Move keys -> buff */ + + bmove_upp((byte*) buff+new_right_length,(byte*) buff+right_length, + right_length-2); + length=new_right_length-right_length-k_length; + memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length); + pos=curr_buff+new_left_length; + memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length); + memcpy((byte*) buff+2,(byte*) pos+k_length,(size_t) length); + } + + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff) || + _ma_write_keypage(info,keyinfo,father_page,DFLT_INIT_HITS,father_buff)) + goto err; + DBUG_RETURN(0); + } + + /* curr_buff[] and buff[] are full, lets split and make new nod */ + + extra_buff=info->buff+info->s->base.max_key_block_length; + new_left_length=new_right_length=2+nod_flag+(keys+1)/3*curr_keylength; + if (keys == 5) /* Too few keys to balance */ + new_left_length-=curr_keylength; + extra_length=nod_flag+left_length+right_length- + new_left_length-new_right_length-curr_keylength; + DBUG_PRINT("info",("left_length: %d right_length: %d new_left_length: %d new_right_length: %d extra_length: %d", + left_length, right_length, + new_left_length, new_right_length, + extra_length)); + maria_putint(curr_buff,new_left_length,nod_flag); + maria_putint(buff,new_right_length,nod_flag); + maria_putint(extra_buff,extra_length+2,nod_flag); + + /* move first largest keys to new page */ + pos=buff+right_length-extra_length; + memcpy((byte*) extra_buff+2,pos,(size_t) extra_length); + /* Save new parting key */ + memcpy(tmp_part_key, pos-k_length,k_length); + /* Make place for new keys */ + bmove_upp((byte*) buff+new_right_length,(byte*) pos-k_length, + right_length-extra_length-k_length-2); + /* Copy keys from left page */ + pos= curr_buff+new_left_length; + memcpy((byte*) buff+2,(byte*) pos+k_length, + (size_t) (length=left_length-new_left_length-k_length)); + /* Copy old parting key */ + memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length); + + /* Move new parting keys up to caller */ + memcpy((byte*) (right ? key : father_key_pos),pos,(size_t) k_length); + memcpy((byte*) (right ? father_key_pos : key),tmp_part_key, k_length); + + if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + goto err; + _ma_kpointer(info,key+k_length,new_pos); + if (_ma_write_keypage(info,keyinfo,(right ? new_pos : next_page), + DFLT_INIT_HITS,info->buff) || + _ma_write_keypage(info,keyinfo,(right ? next_page : new_pos), + DFLT_INIT_HITS,extra_buff)) + goto err; + + DBUG_RETURN(1); /* Middle key up */ + +err: + DBUG_RETURN(-1); +} /* _ma_balance_page */ + +/********************************************************************** + * Bulk insert code * + **********************************************************************/ + +typedef struct { + MARIA_HA *info; + uint keynr; +} bulk_insert_param; + +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, uchar *key, + uint key_length) +{ + int error; + DBUG_ENTER("_ma_ck_write_tree"); + + error= tree_insert(&info->bulk_insert[keynr], key, + key_length + info->s->rec_reflength, + info->bulk_insert[keynr].custom_arg) ? 0 : HA_ERR_OUT_OF_MEM ; + + DBUG_RETURN(error); +} /* _ma_ck_write_tree */ + + +/* typeof(_ma_keys_compare)=qsort_cmp2 */ + +static int keys_compare(bulk_insert_param *param, uchar *key1, uchar *key2) +{ + uint not_used[2]; + return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg, + key1, key2, USE_WHOLE_KEY, SEARCH_SAME, + not_used); +} + + +static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) +{ + /* + Probably I can use info->lastkey here, but I'm not sure, + and to be safe I'd better use local lastkey. + */ + uchar lastkey[HA_MAX_KEY_BUFF]; + uint keylen; + MARIA_KEYDEF *keyinfo; + + switch (mode) { + case free_init: + if (param->info->s->concurrent_insert) + { + rw_wrlock(¶m->info->s->key_root_lock[param->keynr]); + param->info->s->keyinfo[param->keynr].version++; + } + return 0; + case free_free: + keyinfo=param->info->s->keyinfo+param->keynr; + keylen= _ma_keylength(keyinfo, key); + memcpy(lastkey, key, keylen); + return _ma_ck_write_btree(param->info,param->keynr,lastkey, + keylen - param->info->s->rec_reflength); + case free_end: + if (param->info->s->concurrent_insert) + rw_unlock(¶m->info->s->key_root_lock[param->keynr]); + return 0; + } + return -1; +} + + +int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + bulk_insert_param *params; + uint i, num_keys, total_keylength; + ulonglong key_map; + DBUG_ENTER("_ma_init_bulk_insert"); + DBUG_PRINT("enter",("cache_size: %lu", cache_size)); + + DBUG_ASSERT(!info->bulk_insert && + (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT)); + + maria_clear_all_keys_active(key_map); + for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++) + { + if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) && + maria_is_key_active(share->state.key_map, i)) + { + num_keys++; + maria_set_key_active(key_map, i); + total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE; + } + } + + if (num_keys==0 || + num_keys * MARIA_MIN_SIZE_BULK_INSERT_TREE > cache_size) + DBUG_RETURN(0); + + if (rows && rows*total_keylength < cache_size) + cache_size=rows; + else + cache_size/=total_keylength*16; + + info->bulk_insert=(TREE *) + my_malloc((sizeof(TREE)*share->base.keys+ + sizeof(bulk_insert_param)*num_keys),MYF(0)); + + if (!info->bulk_insert) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + params=(bulk_insert_param *)(info->bulk_insert+share->base.keys); + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(key_map, i)) + { + params->info=info; + params->keynr=i; + /* Only allocate a 16'th of the buffer at a time */ + init_tree(&info->bulk_insert[i], + cache_size * key[i].maxlength, + cache_size * key[i].maxlength, 0, + (qsort_cmp2)keys_compare, 0, + (tree_element_free) keys_free, (void *)params++); + } + else + info->bulk_insert[i].root=0; + } + + DBUG_RETURN(0); +} + +void maria_flush_bulk_insert(MARIA_HA *info, uint inx) +{ + if (info->bulk_insert) + { + if (is_tree_inited(&info->bulk_insert[inx])) + reset_tree(&info->bulk_insert[inx]); + } +} + +void maria_end_bulk_insert(MARIA_HA *info) +{ + if (info->bulk_insert) + { + uint i; + for (i=0 ; i < info->s->base.keys ; i++) + { + if (is_tree_inited(& info->bulk_insert[i])) + { + delete_tree(& info->bulk_insert[i]); + } + } + my_free((void *)info->bulk_insert, MYF(0)); + info->bulk_insert=0; + } +} diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c new file mode 100644 index 00000000000..de76344a800 --- /dev/null +++ b/storage/maria/maria_chk.c @@ -0,0 +1,1824 @@ +/* Copyright (C) 2006-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Describe, check and repair of MARIA tables */ + +#include "ma_fulltext.h" +#include +#include +#include +#include +#ifdef HAVE_SYS_VADVICE_H +#include +#endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif +SET_STACK_SIZE(9000) /* Minimum stack size for program */ + +#ifndef USE_RAID +#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G) +#define my_raid_delete(A,B,C) my_delete(A,B) +#endif + +#ifdef OS2 +#define _sanity(a,b) +#endif + +static uint decode_bits; +static char **default_argv; +static const char *load_default_groups[]= { "mariachk", 0 }; +static const char *set_collation_name, *opt_tmpdir; +static CHARSET_INFO *set_collation; +static long opt_maria_block_size; +static long opt_key_cache_block_size; +static const char *my_progname_short; +static int stopwords_inited= 0; +static MY_TMPDIR mariachk_tmpdir; + +static const char *type_names[]= +{ "impossible","char","binary", "short", "long", "float", + "double","number","unsigned short", + "unsigned long","longlong","ulonglong","int24", + "uint24","int8","varchar", "varbin","?", + "?"}; + +static const char *prefix_packed_txt="packed ", + *bin_packed_txt="prefix ", + *diff_txt="stripped ", + *null_txt="NULL", + *blob_txt="BLOB "; + +static const char *field_pack[]= +{"","no endspace", "no prespace", + "no zeros", "blob", "constant", "table-lockup", + "always zero","varchar","unique-hash","?","?"}; + +static const char *maria_stats_method_str="nulls_unequal"; + +static void get_options(int *argc,char * * *argv); +static void print_version(void); +static void usage(void); +static int mariachk(HA_CHECK *param, char *filename); +static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name); +static int maria_sort_records(HA_CHECK *param, register MARIA_HA *info, + my_string name, uint sort_key, + my_bool write_info, my_bool update_index); +static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page,uchar *buff,uint sortkey, + File new_file, my_bool update_index); + +HA_CHECK check_param; + + /* Main program */ + +int main(int argc, char **argv) +{ + int error; + MY_INIT(argv[0]); + my_progname_short= my_progname+dirname_length(my_progname); + +#ifdef __EMX__ + _wildcard (&argc, &argv); +#endif + + mariachk_init(&check_param); + check_param.opt_lock_memory= 1; /* Lock memory if possible */ + check_param.using_global_keycache = 0; + get_options(&argc,(char***) &argv); + maria_quick_table_bits=decode_bits; + error=0; + maria_init(); + + while (--argc >= 0) + { + int new_error=mariachk(&check_param, *(argv++)); + if ((check_param.testflag & T_REP_ANY) != T_REP) + check_param.testflag&= ~T_REP; + VOID(fflush(stdout)); + VOID(fflush(stderr)); + if ((check_param.error_printed | check_param.warning_printed) && + (check_param.testflag & T_FORCE_CREATE) && + (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS | + T_SORT_INDEX)))) + { + uint old_testflag=check_param.testflag; + if (!(check_param.testflag & T_REP)) + check_param.testflag|= T_REP_BY_SORT; + check_param.testflag&= ~T_EXTEND; /* Don't needed */ + error|=mariachk(&check_param, argv[-1]); + check_param.testflag= old_testflag; + VOID(fflush(stdout)); + VOID(fflush(stderr)); + } + else + error|=new_error; + if (argc && (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO)) + { + puts("\n---------\n"); + VOID(fflush(stdout)); + } + } + if (check_param.total_files > 1) + { /* Only if descript */ + char buff[22],buff2[22]; + if (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO) + puts("\n---------\n"); + printf("\nTotal of all %d MARIA-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff), + llstr(check_param.total_deleted,buff2)); + } + free_defaults(default_argv); + free_tmpdir(&mariachk_tmpdir); + maria_end(); + my_end(check_param.testflag & T_INFO ? + MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error); +#ifndef _lint + return 0; /* No compiler warning */ +#endif +} /* main */ + +enum options_mc { + OPT_CHARSETS_DIR=256, OPT_SET_COLLATION,OPT_START_CHECK_POS, + OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE, + OPT_KEY_CACHE_BLOCK_SIZE, OPT_MARIA_BLOCK_SIZE, + OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE, + OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN, + OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE, + OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD +}; + +static struct my_option my_long_options[] = +{ + {"analyze", 'a', + "Analyze distribution of keys. Will make some joins in MySQL faster. You can check the calculated distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef __NETWARE__ + {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"block-search", 'b', + "No help available.", + 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"backup", 'B', + "Make a backup of the .MYD file as 'filename-time.BAK'.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR, + "Directory where character sets are.", + (gptr*) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"check", 'c', + "Check table for errors.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"check-only-changed", 'C', + "Check only tables that have changed since last check. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"correct-checksum", OPT_CORRECT_CHECKSUM, + "Correct checksum information for table.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', + "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"description", 'd', + "Prints some information about table.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"data-file-length", 'D', + "Max length of data file (when recreating data-file when it's full).", + (gptr*) &check_param.max_data_file_length, + (gptr*) &check_param.max_data_file_length, + 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"extend-check", 'e', + "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"fast", 'F', + "Check only tables that haven't been closed properly. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"force", 'f', + "Restart with -r if there are any errors in the table. States will be updated as with --update-state.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"HELP", 'H', + "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', + "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"information", 'i', + "Print statistics information about table that is checked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"keys-used", 'k', + "Tell MARIA to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", + (gptr*) &check_param.keys_in_use, + (gptr*) &check_param.keys_in_use, + 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, + {"max-record-length", OPT_MAX_RECORD_LENGTH, + "Skip rows bigger than this if mariachk can't allocate memory to hold it", + (gptr*) &check_param.max_record_length, + (gptr*) &check_param.max_record_length, + 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, + {"medium-check", 'm', + "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"quick", 'q', "Faster repair by not modifying the data file.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"read-only", 'T', + "Don't mark table as checked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"recover", 'r', + "Can fix almost anything except unique keys that aren't unique.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"parallel-recover", 'p', + "Same as '-r' but creates all the keys in parallel.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"safe-recover", 'o', + "Uses old recovery method; Slower than '-r' but can handle a couple of cases where '-r' reports that it can't fix the data file.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-recover", 'n', + "Force recovering with sorting even if the temporary file was very big.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef DEBUG + {"start-check-pos", OPT_START_CHECK_POS, + "No help available.", + 0, 0, 0, GET_ULL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"set-auto-increment", 'A', + "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.", + (gptr*) &check_param.auto_increment_value, + (gptr*) &check_param.auto_increment_value, + 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"set-collation", OPT_SET_COLLATION, + "Change the collation used by the index", + (gptr*) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"set-variable", 'O', + "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', + "Only print errors. One can use two -s to make mariachk very silent.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-index", 'S', + "Sort index blocks. This speeds up 'read-next' in applications.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-records", 'R', + "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)", + (gptr*) &check_param.opt_sort_key, + (gptr*) &check_param.opt_sort_key, + 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"tmpdir", 't', + "Path for temporary files.", + (gptr*) &opt_tmpdir, + 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"update-state", 'U', + "Mark tables as crashed if any errors were found.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"unpack", 'u', + "Unpack file packed with mariapack.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', + "Print more information. This can be used with --description and --check. Use many -v for more verbosity!", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', + "Print version and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', + "Wait if table is locked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "", + (gptr*) &check_param.use_buffers, (gptr*) &check_param.use_buffers, 0, + GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, + { "key_cache_block_size", OPT_KEY_CACHE_BLOCK_SIZE, "", + (gptr*) &opt_key_cache_block_size, + (gptr*) &opt_key_cache_block_size, 0, + GET_LONG, REQUIRED_ARG, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + { "maria_block_size", OPT_MARIA_BLOCK_SIZE, "", + (gptr*) &opt_maria_block_size, (gptr*) &opt_maria_block_size, 0, + GET_LONG, REQUIRED_ARG, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + { "read_buffer_size", OPT_READ_BUFFER_SIZE, "", + (gptr*) &check_param.read_buffer_length, + (gptr*) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "", + (gptr*) &check_param.write_buffer_length, + (gptr*) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "", + (gptr*) &check_param.sort_buffer_length, + (gptr*) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD), + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "", + (gptr*) &check_param.sort_key_blocks, + (gptr*) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, + BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0}, + { "decode_bits", OPT_DECODE_BITS, "", (gptr*) &decode_bits, + (gptr*) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, + { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (gptr*) &ft_min_word_len, + (gptr*) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, + 0, 1, 0}, + { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len, + (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, + HA_FT_MAXCHARLEN, 0, 1, 0}, + { "maria_ft_stopword_file", OPT_FT_STOPWORD_FILE, + "Use stopwords from this file instead of built-in list.", + (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"stats_method", OPT_STATS_METHOD, + "Specifies how index statistics collection code should threat NULLs. " + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", + (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +#include + +static void print_version(void) +{ + printf("%s Ver 2.7 for %s at %s\n", my_progname, SYSTEM_TYPE, + MACHINE_TYPE); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("By Monty, for your professional use"); + puts("This software comes with NO WARRANTY: see the PUBLIC for details.\n"); + puts("Description, check and repair of MARIA tables."); + puts("Used without options all tables on the command will be checked for errors"); + printf("Usage: %s [OPTIONS] tables[.MYI]\n", my_progname_short); + printf("\nGlobal options:\n"); +#ifndef DBUG_OFF + printf("\ + -#, --debug=... Output debug log. Often this is 'd:t:o,filename'.\n"); +#endif + printf("\ + -?, --help Display this help and exit.\n\ + -O, --set-variable var=option.\n\ + Change the value of a variable. Please note that\n\ + this option is deprecated; you can set variables\n\ + directly with '--variable-name=value'.\n\ + -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\ + specified, separated by "); +#if defined( __WIN__) || defined(OS2) || defined(__NETWARE__) + printf("semicolon (;)"); +#else + printf("colon (:)"); +#endif + printf(", they will be used\n\ + in a round-robin fashion.\n\ + -s, --silent Only print errors. One can use two -s to make\n\ + mariachk very silent.\n\ + -v, --verbose Print more information. This can be used with\n\ + --description and --check. Use many -v for more verbosity.\n\ + -V, --version Print version and exit.\n\ + -w, --wait Wait if table is locked.\n\n"); +#ifdef DEBUG + puts(" --start-check-pos=# Start reading file at given offset.\n"); +#endif + + puts("Check options (check is the default action for mariachk):\n\ + -c, --check Check table for errors.\n\ + -e, --extend-check Check the table VERY throughly. Only use this in\n\ + extreme cases as mariachk should normally be able to\n\ + find out if the table is ok even without this switch.\n\ + -F, --fast Check only tables that haven't been closed properly.\n\ + -C, --check-only-changed\n\ + Check only tables that have changed since last check.\n\ + -f, --force Restart with '-r' if there are any errors in the table.\n\ + States will be updated as with '--update-state'.\n\ + -i, --information Print statistics information about table that is checked.\n\ + -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\ + all errors. Should be good enough for most cases.\n\ + -U --update-state Mark tables as crashed if you find any errors.\n\ + -T, --read-only Don't mark table as checked.\n"); + + puts("Repair options (When using '-r' or '-o'):\n\ + -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'.\n\ + --correct-checksum Correct checksum information for table.\n\ + -D, --data-file-length=# Max length of data file (when recreating data\n\ + file when it's full).\n\ + -e, --extend-check Try to recover every possible row from the data file\n\ + Normally this will also find a lot of garbage rows;\n\ + Don't use this option if you are not totally desperate.\n\ + -f, --force Overwrite old temporary files.\n\ + -k, --keys-used=# Tell MARIA to update only some specific keys. # is a\n\ + bit mask of which keys to use. This can be used to\n\ + get faster inserts.\n\ + --max-record-length=#\n\ + Skip rows bigger than this if mariachk can't allocate\n\ + memory to hold it.\n\ + -r, --recover Can fix almost anything except unique keys that aren't\n\ + unique.\n\ + -n, --sort-recover Forces recovering with sorting even if the temporary\n\ + file would be very big.\n\ + -p, --parallel-recover\n\ + Uses the same technique as '-r' and '-n', but creates\n\ + all the keys in parallel, in different threads.\n\ + -o, --safe-recover Uses old recovery method; Slower than '-r' but can\n\ + handle a couple of cases where '-r' reports that it\n\ + can't fix the data file.\n\ + --character-sets-dir=...\n\ + Directory where character sets are.\n\ + --set-collation=name\n\ + Change the collation used by the index.\n\ + -q, --quick Faster repair by not modifying the data file.\n\ + One can give a second '-q' to force mariachk to\n\ + modify the original datafile in case of duplicate keys.\n\ + NOTE: Tables where the data file is currupted can't be\n\ + fixed with this option.\n\ + -u, --unpack Unpack file packed with mariapack.\n\ +"); + + puts("Other actions:\n\ + -a, --analyze Analyze distribution of keys. Will make some joins in\n\ + MySQL faster. You can check the calculated distribution\n\ + by using '--description --verbose table_name'.\n\ + --stats_method=name Specifies how index statistics collection code should\n\ + threat NULLs. Possible values of name are \"nulls_unequal\"\n\ + (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\ + \"nulls_ignored\".\n\ + -d, --description Prints some information about table.\n\ + -A, --set-auto-increment[=value]\n\ + Force auto_increment to start at this or higher value\n\ + If no value is given, then sets the next auto_increment\n\ + value to the highest used value for the auto key + 1.\n\ + -S, --sort-index Sort index blocks. This speeds up 'read-next' in\n\ + applications.\n\ + -R, --sort-records=#\n\ + Sort records according to an index. This makes your\n\ + data much more localized and may speed up things\n\ + (It may be VERY slow to do a sort the first time!).\n\ + -b, --block-search=#\n\ + Find a record, a block at given offset belongs to."); + + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include + +const char *maria_stats_method_names[] = {"nulls_unequal", "nulls_equal", + "nulls_ignored", NullS}; +TYPELIB maria_stats_method_typelib= { + array_elements(maria_stats_method_names) - 1, "", + maria_stats_method_names, NULL}; + + /* Read options */ + +static my_bool +get_one_option(int optid, + const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch (optid) { +#ifdef __NETWARE__ + case OPT_AUTO_CLOSE: + setscreenmode(SCR_AUTOCLOSE_ON_EXIT); + break; +#endif + case 'a': + if (argument == disabled_my_option) + check_param.testflag&= ~T_STATISTICS; + else + check_param.testflag|= T_STATISTICS; + break; + case 'A': + if (argument) + check_param.auto_increment_value= strtoull(argument, NULL, 0); + else + check_param.auto_increment_value= 0; /* Set to max used value */ + check_param.testflag|= T_AUTO_INC; + break; + case 'b': + check_param.search_after_block= strtoul(argument, NULL, 10); + break; + case 'B': + if (argument == disabled_my_option) + check_param.testflag&= ~T_BACKUP_DATA; + else + check_param.testflag|= T_BACKUP_DATA; + break; + case 'c': + if (argument == disabled_my_option) + check_param.testflag&= ~T_CHECK; + else + check_param.testflag|= T_CHECK; + break; + case 'C': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_CHECK | T_CHECK_ONLY_CHANGED); + else + check_param.testflag|= T_CHECK | T_CHECK_ONLY_CHANGED; + break; + case 'D': + check_param.max_data_file_length=strtoll(argument, NULL, 10); + break; + case 's': /* silent */ + if (argument == disabled_my_option) + check_param.testflag&= ~(T_SILENT | T_VERY_SILENT); + else + { + if (check_param.testflag & T_SILENT) + check_param.testflag|= T_VERY_SILENT; + check_param.testflag|= T_SILENT; + check_param.testflag&= ~T_WRITE_LOOP; + } + break; + case 'w': + if (argument == disabled_my_option) + check_param.testflag&= ~T_WAIT_FOREVER; + else + check_param.testflag|= T_WAIT_FOREVER; + break; + case 'd': /* description if isam-file */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_DESCRIPT; + else + check_param.testflag|= T_DESCRIPT; + break; + case 'e': /* extend check */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_EXTEND; + else + check_param.testflag|= T_EXTEND; + break; + case 'i': + if (argument == disabled_my_option) + check_param.testflag&= ~T_INFO; + else + check_param.testflag|= T_INFO; + break; + case 'f': + if (argument == disabled_my_option) + { + check_param.tmpfile_createflag= O_RDWR | O_TRUNC | O_EXCL; + check_param.testflag&= ~(T_FORCE_CREATE | T_UPDATE_STATE); + } + else + { + check_param.tmpfile_createflag= O_RDWR | O_TRUNC; + check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE; + } + break; + case 'F': + if (argument == disabled_my_option) + check_param.testflag&= ~T_FAST; + else + check_param.testflag|= T_FAST; + break; + case 'k': + check_param.keys_in_use= (ulonglong) strtoll(argument, NULL, 10); + break; + case 'm': + if (argument == disabled_my_option) + check_param.testflag&= ~T_MEDIUM; + else + check_param.testflag|= T_MEDIUM; /* Medium check */ + break; + case 'r': /* Repair table */ + check_param.testflag&= ~T_REP_ANY; + if (argument != disabled_my_option) + check_param.testflag|= T_REP_BY_SORT; + break; + case 'p': + check_param.testflag&= ~T_REP_ANY; + if (argument != disabled_my_option) + check_param.testflag|= T_REP_PARALLEL; + break; + case 'o': + check_param.testflag&= ~T_REP_ANY; + check_param.force_sort= 0; + if (argument != disabled_my_option) + { + check_param.testflag|= T_REP; + my_disable_async_io= 1; /* More safety */ + } + break; + case 'n': + check_param.testflag&= ~T_REP_ANY; + if (argument == disabled_my_option) + check_param.force_sort= 0; + else + { + check_param.testflag|= T_REP_BY_SORT; + check_param.force_sort= 1; + } + break; + case 'q': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_QUICK | T_FORCE_UNIQUENESS); + else + check_param.testflag|= + (check_param.testflag & T_QUICK) ? T_FORCE_UNIQUENESS : T_QUICK; + break; + case 'u': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_UNPACK | T_REP_BY_SORT); + else + check_param.testflag|= T_UNPACK | T_REP_BY_SORT; + break; + case 'v': /* Verbose */ + if (argument == disabled_my_option) + { + check_param.testflag&= ~T_VERBOSE; + check_param.verbose=0; + } + else + { + check_param.testflag|= T_VERBOSE; + check_param.verbose++; + } + break; + case 'R': /* Sort records */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_SORT_RECORDS; + else + { + check_param.testflag|= T_SORT_RECORDS; + check_param.opt_sort_key= (uint) atoi(argument) - 1; + if (check_param.opt_sort_key >= MARIA_MAX_KEY) + { + fprintf(stderr, + "The value of the sort key is bigger than max key: %d.\n", + MARIA_MAX_KEY); + exit(1); + } + } + break; + case 'S': /* Sort index */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_SORT_INDEX; + else + check_param.testflag|= T_SORT_INDEX; + break; + case 'T': + if (argument == disabled_my_option) + check_param.testflag&= ~T_READONLY; + else + check_param.testflag|= T_READONLY; + break; + case 'U': + if (argument == disabled_my_option) + check_param.testflag&= ~T_UPDATE_STATE; + else + check_param.testflag|= T_UPDATE_STATE; + break; + case '#': + if (argument == disabled_my_option) + { + DBUG_POP(); + } + else + { + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/mariachk.trace"); + } + break; + case 'V': + print_version(); + exit(0); + case OPT_CORRECT_CHECKSUM: + if (argument == disabled_my_option) + check_param.testflag&= ~T_CALC_CHECKSUM; + else + check_param.testflag|= T_CALC_CHECKSUM; + break; + case OPT_STATS_METHOD: + { + int method; + enum_handler_stats_method method_conv; + maria_stats_method_str= argument; + if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0) + { + fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); + exit(1); + } + switch (method-1) { + case 0: + method_conv= MI_STATS_METHOD_NULLS_EQUAL; + break; + case 1: + method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL; + break; + case 2: + method_conv= MI_STATS_METHOD_IGNORE_NULLS; + break; + } + check_param.stats_method= method_conv; + break; + } +#ifdef DEBUG /* Only useful if debugging */ + case OPT_START_CHECK_POS: + check_param.start_check_pos= strtoull(argument, NULL, 0); + break; +#endif + case 'H': + my_print_help(my_long_options); + exit(0); + case '?': + usage(); + exit(0); + } + return 0; +} + + +static void get_options(register int *argc,register char ***argv) +{ + int ho_error; + + load_defaults("my", load_default_groups, argc, argv); + default_argv= *argv; + if (isatty(fileno(stdout))) + check_param.testflag|=T_WRITE_LOOP; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + /* If using repair, then update checksum if one uses --update-state */ + if ((check_param.testflag & T_UPDATE_STATE) && + (check_param.testflag & T_REP_ANY)) + check_param.testflag|= T_CALC_CHECKSUM; + + if (*argc == 0) + { + usage(); + exit(-1); + } + + if ((check_param.testflag & T_UNPACK) && + (check_param.testflag & (T_QUICK | T_SORT_RECORDS))) + { + VOID(fprintf(stderr, + "%s: --unpack can't be used with --quick or --sort-records\n", + my_progname_short)); + exit(1); + } + if ((check_param.testflag & T_READONLY) && + (check_param.testflag & + (T_REP_ANY | T_STATISTICS | T_AUTO_INC | + T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE))) + { + VOID(fprintf(stderr, + "%s: Can't use --readonly when repairing or sorting\n", + my_progname_short)); + exit(1); + } + + if (init_tmpdir(&mariachk_tmpdir, opt_tmpdir)) + exit(1); + + check_param.tmpdir=&mariachk_tmpdir; + check_param.key_cache_block_size= opt_key_cache_block_size; + + if (set_collation_name) + if (!(set_collation= get_charset_by_name(set_collation_name, + MYF(MY_WME)))) + exit(1); + + maria_block_size=(uint) 1 << my_bit_log2(opt_maria_block_size); + return; +} /* get options */ + + + /* Check table */ + +static int mariachk(HA_CHECK *param, my_string filename) +{ + int error,lock_type,recreate; + int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); + uint raid_chunks; + MARIA_HA *info; + File datafile; + char llbuff[22],llbuff2[22]; + my_bool state_updated=0; + MARIA_SHARE *share; + DBUG_ENTER("mariachk"); + + param->out_flag=error=param->warning_printed=param->error_printed= + recreate=0; + datafile=0; + param->isam_file_name=filename; /* For error messages */ + if (!(info=maria_open(filename, + (param->testflag & (T_DESCRIPT | T_READONLY)) ? + O_RDONLY : O_RDWR, + HA_OPEN_FOR_REPAIR | + ((param->testflag & T_WAIT_FOREVER) ? + HA_OPEN_WAIT_IF_LOCKED : + (param->testflag & T_DESCRIPT) ? + HA_OPEN_IGNORE_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED)))) + { + /* Avoid twice printing of isam file name */ + param->error_printed=1; + switch (my_errno) { + case HA_ERR_CRASHED: + _ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename); + break; + case HA_ERR_NOT_A_TABLE: + _ma_check_print_error(param,"'%s' is not a MARIA-table",filename); + break; + case HA_ERR_CRASHED_ON_USAGE: + _ma_check_print_error(param,"'%s' is marked as crashed",filename); + break; + case HA_ERR_CRASHED_ON_REPAIR: + _ma_check_print_error(param,"'%s' is marked as crashed after last repair",filename); + break; + case HA_ERR_OLD_FILE: + _ma_check_print_error(param,"'%s' is a old type of MARIA-table", filename); + break; + case HA_ERR_END_OF_FILE: + _ma_check_print_error(param,"Couldn't read complete header from '%s'", filename); + break; + case EAGAIN: + _ma_check_print_error(param,"'%s' is locked. Use -w to wait until unlocked",filename); + break; + case ENOENT: + _ma_check_print_error(param,"File '%s' doesn't exist",filename); + break; + case EACCES: + _ma_check_print_error(param,"You don't have permission to use '%s'",filename); + break; + default: + _ma_check_print_error(param,"%d when opening MARIA-table '%s'", + my_errno,filename); + break; + } + DBUG_RETURN(1); + } + share=info->s; + share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ + share->tot_locks-= share->r_locks; + share->r_locks=0; + raid_chunks=share->base.raid_chunks; + + /* + Skip the checking of the file if: + We are using --fast and the table is closed properly + We are using --check-only-changed-tables and the table hasn't changed + */ + if (param->testflag & (T_FAST | T_CHECK_ONLY_CHANGED)) + { + my_bool need_to_check= maria_is_crashed(info) || share->state.open_count != 0; + + if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) && + ((share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR) || + !(param->testflag & T_CHECK_ONLY_CHANGED)))) + need_to_check=1; + + if (info->s->base.keys && info->state->records) + { + if ((param->testflag & T_STATISTICS) && + (share->state.changed & STATE_NOT_ANALYZED)) + need_to_check=1; + if ((param->testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + need_to_check=1; + if ((param->testflag & T_REP_BY_SORT) && + (share->state.changed & STATE_NOT_OPTIMIZED_KEYS)) + need_to_check=1; + } + if ((param->testflag & T_CHECK_ONLY_CHANGED) && + (share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR))) + need_to_check=1; + if (!need_to_check) + { + if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) + printf("MARIA file: %s is already checked\n",filename); + if (maria_close(info)) + { + _ma_check_print_error(param,"%d when closing MARIA-table '%s'", + my_errno,filename); + DBUG_RETURN(1); + } + DBUG_RETURN(0); + } + } + if ((param->testflag & (T_REP_ANY | T_STATISTICS | + T_SORT_RECORDS | T_SORT_INDEX)) && + (((param->testflag & T_UNPACK) && + share->data_file_type == COMPRESSED_RECORD) || + mi_uint2korr(share->state.header.state_info_length) != + MARIA_STATE_INFO_SIZE || + mi_uint2korr(share->state.header.base_info_length) != + MARIA_BASE_INFO_SIZE || + maria_is_any_intersect_keys_active(param->keys_in_use, share->base.keys, + ~share->state.key_map) || + maria_test_if_almost_full(info) || + info->s->state.header.file_version[3] != maria_file_magic[3] || + (set_collation && + set_collation->number != share->state.header.language) || + maria_block_size != MARIA_KEY_BLOCK_LENGTH)) + { + if (set_collation) + param->language= set_collation->number; + if (maria_recreate_table(param, &info,filename)) + { + VOID(fprintf(stderr, + "MARIA-table '%s' is not fixed because of errors\n", + filename)); + return(-1); + } + recreate=1; + if (!(param->testflag & T_REP_ANY)) + { + param->testflag|=T_REP_BY_SORT; /* if only STATISTICS */ + if (!(param->testflag & T_SILENT)) + printf("- '%s' has old table-format. Recreating index\n",filename); + rep_quick|=T_QUICK; + } + share=info->s; + share->tot_locks-= share->r_locks; + share->r_locks=0; + } + + if (param->testflag & T_DESCRIPT) + { + param->total_files++; + param->total_records+=info->state->records; + param->total_deleted+=info->state->del; + descript(param, info, filename); + } + else + { + if (!stopwords_inited++) + ft_init_stopwords(); + + if (!(param->testflag & T_READONLY)) + lock_type = F_WRLCK; /* table is changed */ + else + lock_type= F_RDLCK; + if (info->lock_type == F_RDLCK) + info->lock_type=F_UNLCK; /* Read only table */ + if (_ma_readinfo(info,lock_type,0)) + { + _ma_check_print_error(param,"Can't lock indexfile of '%s', error: %d", + filename,my_errno); + param->error_printed=0; + goto end2; + } + /* + _ma_readinfo() has locked the table. + We mark the table as locked (without doing file locks) to be able to + use functions that only works on locked tables (like row caching). + */ + maria_lock_database(info, F_EXTRA_LCK); + datafile=info->dfile; + + if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) + { + if (param->testflag & T_REP_ANY) + { + ulonglong tmp=share->state.key_map; + maria_copy_keys_active(share->state.key_map, share->base.keys, + param->keys_in_use); + if (tmp != share->state.key_map) + info->update|=HA_STATE_CHANGED; + } + if (rep_quick && maria_chk_del(param, info, param->testflag & ~T_VERBOSE)) + { + if (param->testflag & T_FORCE_CREATE) + { + rep_quick=0; + _ma_check_print_info(param,"Creating new data file\n"); + } + else + { + error=1; + _ma_check_print_error(param, + "Quick-recover aborted; Run recovery without switch 'q'"); + } + } + if (!error) + { + if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && + (maria_is_any_key_active(share->state.key_map) || + (rep_quick && !param->keys_in_use && !recreate)) && + maria_test_if_sort_rep(info, info->state->records, + info->s->state.key_map, + param->force_sort)) + { + if (param->testflag & T_REP_BY_SORT) + error=maria_repair_by_sort(param,info,filename,rep_quick); + else + error=maria_repair_parallel(param,info,filename,rep_quick); + state_updated=1; + } + else if (param->testflag & T_REP_ANY) + error=maria_repair(param, info,filename,rep_quick); + } + if (!error && param->testflag & T_SORT_RECORDS) + { + /* + The data file is nowadays reopened in the repair code so we should + soon remove the following reopen-code + */ +#ifndef TO_BE_REMOVED + if (param->out_flag & O_NEW_DATA) + { /* Change temp file to org file */ + VOID(my_close(info->dfile,MYF(MY_WME))); /* Close new file */ + error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + raid_chunks, + MYF(0)); + if (_ma_open_datafile(info,info->s, -1)) + error=1; + param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */ + param->read_cache.file=info->dfile; + } +#endif + if (! error) + { + uint key; + /* + We can't update the index in maria_sort_records if we have a + prefix compressed or fulltext index + */ + my_bool update_index=1; + for (key=0 ; key < share->base.keys; key++) + if (share->keyinfo[key].flag & (HA_BINARY_PACK_KEY|HA_FULLTEXT)) + update_index=0; + + error=maria_sort_records(param,info,filename,param->opt_sort_key, + /* what is the following parameter for ? */ + (my_bool) !(param->testflag & T_REP), + update_index); + datafile=info->dfile; /* This is now locked */ + if (!error && !update_index) + { + if (param->verbose) + puts("Table had a compressed index; We must now recreate the index"); + error=maria_repair_by_sort(param,info,filename,1); + } + } + } + if (!error && param->testflag & T_SORT_INDEX) + error=maria_sort_index(param,info,filename); + if (!error) + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + else + maria_mark_crashed(info); + } + else if ((param->testflag & T_CHECK) || !(param->testflag & T_AUTO_INC)) + { + if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) + printf("Checking MARIA file: %s\n",filename); + if (!(param->testflag & T_SILENT)) + printf("Data records: %7s Deleted blocks: %7s\n", + llstr(info->state->records,llbuff), + llstr(info->state->del,llbuff2)); + error =maria_chk_status(param,info); + maria_intersect_keys_active(share->state.key_map, param->keys_in_use); + error =maria_chk_size(param,info); + if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE))) + error|=maria_chk_del(param, info,param->testflag); + if ((!error || (!(param->testflag & (T_FAST | T_FORCE_CREATE)) && + !param->start_check_pos))) + { + error|=maria_chk_key(param, info); + if (!error && (param->testflag & (T_STATISTICS | T_AUTO_INC))) + error=maria_update_state_info(param, info, + ((param->testflag & T_STATISTICS) ? + UPDATE_STAT : 0) | + ((param->testflag & T_AUTO_INC) ? + UPDATE_AUTO_INC : 0)); + } + if ((!rep_quick && !error) || + !(param->testflag & (T_FAST | T_FORCE_CREATE))) + { + if (param->testflag & (T_EXTEND | T_MEDIUM)) + VOID(init_key_cache(maria_key_cache,opt_key_cache_block_size, + param->use_buffers, 0, 0)); + VOID(init_io_cache(¶m->read_cache,datafile, + (uint) param->read_buffer_length, + READ_CACHE, + (param->start_check_pos ? + param->start_check_pos : + share->pack.header_length), + 1, + MYF(MY_WME))); + maria_lock_memory(param); + if ((info->s->options & (HA_OPTION_PACK_RECORD | + HA_OPTION_COMPRESS_RECORD)) || + (param->testflag & (T_EXTEND | T_MEDIUM))) + error|=maria_chk_data_link(param, info, param->testflag & T_EXTEND); + error|=_ma_flush_blocks(param, share->key_cache, share->kfile); + VOID(end_io_cache(¶m->read_cache)); + } + if (!error) + { + if ((share->state.changed & STATE_CHANGED) && + (param->testflag & T_UPDATE_STATE)) + info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + } + else if (!maria_is_crashed(info) && + (param->testflag & T_UPDATE_STATE)) + { /* Mark crashed */ + maria_mark_crashed(info); + info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + } + } + if ((param->testflag & T_AUTO_INC) || + ((param->testflag & T_REP_ANY) && info->s->base.auto_key)) + _ma_update_auto_increment_key(param, info, + (my_bool) !test(param->testflag & T_AUTO_INC)); + + if (!(param->testflag & T_DESCRIPT)) + { + if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY)) + error|=maria_update_state_info(param, info, + UPDATE_OPEN_COUNT | + (((param->testflag & T_REP_ANY) ? + UPDATE_TIME : 0) | + (state_updated ? UPDATE_STAT : 0) | + ((param->testflag & T_SORT_RECORDS) ? + UPDATE_SORT : 0))); + VOID(maria_lock_file(param, share->kfile,0L,F_UNLCK,"indexfile",filename)); + info->update&= ~HA_STATE_CHANGED; + } + maria_lock_database(info, F_UNLCK); +end2: + if (maria_close(info)) + { + _ma_check_print_error(param,"%d when closing MARIA-table '%s'",my_errno,filename); + DBUG_RETURN(1); + } + if (error == 0) + { + if (param->out_flag & O_NEW_DATA) + error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + raid_chunks, + ((param->testflag & T_BACKUP_DATA) ? + MYF(MY_REDEL_MAKE_BACKUP) : MYF(0))); + if (param->out_flag & O_NEW_INDEX) + error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT,0, + MYF(0)); + } + VOID(fflush(stdout)); VOID(fflush(stderr)); + if (param->error_printed) + { + if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) + { + VOID(fprintf(stderr, + "MARIA-table '%s' is not fixed because of errors\n", + filename)); + if (param->testflag & T_REP_ANY) + VOID(fprintf(stderr, + "Try fixing it by using the --safe-recover (-o), the --force (-f) option or by not using the --quick (-q) flag\n")); + } + else if (!(param->error_printed & 2) && + !(param->testflag & T_FORCE_CREATE)) + VOID(fprintf(stderr, + "MARIA-table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n", + filename)); + } + else if (param->warning_printed && + ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX | + T_FORCE_CREATE))) + VOID(fprintf(stderr, "MARIA-table '%s' is usable but should be fixed\n", + filename)); + VOID(fflush(stderr)); + DBUG_RETURN(error); +} /* mariachk */ + + + /* Write info about table */ + +static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) +{ + uint key,keyseg_nr,field,start; + reg3 MARIA_KEYDEF *keyinfo; + reg2 HA_KEYSEG *keyseg; + reg4 const char *text; + char buff[160],length[10],*pos,*end; + enum en_fieldtype type; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + DBUG_ENTER("describe"); + + printf("\nMARIA file: %s\n",name); + fputs("Record format: ",stdout); + if (share->options & HA_OPTION_COMPRESS_RECORD) + puts("Compressed"); + else if (share->options & HA_OPTION_PACK_RECORD) + puts("Packed"); + else + puts("Fixed length"); + printf("Character set: %s (%d)\n", + get_charset_name(share->state.header.language), + share->state.header.language); + + if (param->testflag & T_VERBOSE) + { + printf("File-version: %d\n", + (int) share->state.header.file_version[3]); + if (share->state.create_time) + { + get_date(buff,1,share->state.create_time); + printf("Creation time: %s\n",buff); + } + if (share->state.check_time) + { + get_date(buff,1,share->state.check_time); + printf("Recover time: %s\n",buff); + } + pos=buff; + if (share->state.changed & STATE_CRASHED) + strmov(buff,"crashed"); + else + { + if (share->state.open_count) + pos=strmov(pos,"open,"); + if (share->state.changed & STATE_CHANGED) + pos=strmov(pos,"changed,"); + else + pos=strmov(pos,"checked,"); + if (!(share->state.changed & STATE_NOT_ANALYZED)) + pos=strmov(pos,"analyzed,"); + if (!(share->state.changed & STATE_NOT_OPTIMIZED_KEYS)) + pos=strmov(pos,"optimized keys,"); + if (!(share->state.changed & STATE_NOT_SORTED_PAGES)) + pos=strmov(pos,"sorted index pages,"); + pos[-1]=0; /* Remove extra ',' */ + } + printf("Status: %s\n",buff); + if (share->base.auto_key) + { + printf("Auto increment key: %13d Last value: %13s\n", + share->base.auto_key, + llstr(share->state.auto_increment,llbuff)); + } + if (share->base.raid_type) + { + printf("RAID: Type: %u Chunks: %u Chunksize: %lu\n", + share->base.raid_type, + share->base.raid_chunks, + share->base.raid_chunksize); + } + if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + printf("Checksum: %23s\n",llstr(info->state->checksum,llbuff)); +; + if (share->options & HA_OPTION_DELAY_KEY_WRITE) + printf("Keys are only flushed at close\n"); + + } + printf("Data records: %13s Deleted blocks: %13s\n", + llstr(info->state->records,llbuff),llstr(info->state->del,llbuff2)); + if (param->testflag & T_SILENT) + DBUG_VOID_RETURN; /* This is enough */ + + if (param->testflag & T_VERBOSE) + { +#ifdef USE_RELOC + printf("Init-relocation: %13s\n",llstr(share->base.reloc,llbuff)); +#endif + printf("Datafile parts: %13s Deleted data: %13s\n", + llstr(share->state.split,llbuff), + llstr(info->state->empty,llbuff2)); + printf("Datafile pointer (bytes):%9d Keyfile pointer (bytes):%9d\n", + share->rec_reflength,share->base.key_reflength); + printf("Datafile length: %13s Keyfile length: %13s\n", + llstr(info->state->data_file_length,llbuff), + llstr(info->state->key_file_length,llbuff2)); + + if (info->s->base.reloc == 1L && info->s->base.records == 1L) + puts("This is a one-record table"); + else + { + if (share->base.max_data_file_length != HA_OFFSET_ERROR || + share->base.max_key_file_length != HA_OFFSET_ERROR) + printf("Max datafile length: %13s Max keyfile length: %13s\n", + llstr(share->base.max_data_file_length-1,llbuff), + llstr(share->base.max_key_file_length-1,llbuff2)); + } + } + + printf("Recordlength: %13d\n",(int) share->base.pack_reclength); + if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) + { + longlong2str(share->state.key_map,buff,2); + printf("Using only keys '%s' of %d possibly keys\n", + buff, share->base.keys); + } + puts("\ntable description:"); + printf("Key Start Len Index Type"); + if (param->testflag & T_VERBOSE) + printf(" Rec/key Root Blocksize"); + VOID(putchar('\n')); + + for (key=keyseg_nr=0, keyinfo= &share->keyinfo[0] ; + key < share->base.keys; + key++,keyinfo++) + { + keyseg=keyinfo->seg; + if (keyinfo->flag & HA_NOSAME) text="unique "; + else if (keyinfo->flag & HA_FULLTEXT) text="fulltext "; + else text="multip."; + + pos=buff; + if (keyseg->flag & HA_REVERSE_SORT) + *pos++ = '-'; + pos=strmov(pos,type_names[keyseg->type]); + *pos++ = ' '; + *pos=0; + if (keyinfo->flag & HA_PACK_KEY) + pos=strmov(pos,prefix_packed_txt); + if (keyinfo->flag & HA_BINARY_PACK_KEY) + pos=strmov(pos,bin_packed_txt); + if (keyseg->flag & HA_SPACE_PACK) + pos=strmov(pos,diff_txt); + if (keyseg->flag & HA_BLOB_PART) + pos=strmov(pos,blob_txt); + if (keyseg->flag & HA_NULL_PART) + pos=strmov(pos,null_txt); + *pos=0; + + printf("%-4d%-6ld%-3d %-8s%-21s", + key+1,(long) keyseg->start+1,keyseg->length,text,buff); + if (share->state.key_root[key] != HA_OFFSET_ERROR) + llstr(share->state.key_root[key],buff); + else + buff[0]=0; + if (param->testflag & T_VERBOSE) + printf("%11lu %12s %10d", + share->state.rec_per_key_part[keyseg_nr++], + buff,keyinfo->block_length); + VOID(putchar('\n')); + while ((++keyseg)->type != HA_KEYTYPE_END) + { + pos=buff; + if (keyseg->flag & HA_REVERSE_SORT) + *pos++ = '-'; + pos=strmov(pos,type_names[keyseg->type]); + *pos++= ' '; + if (keyseg->flag & HA_SPACE_PACK) + pos=strmov(pos,diff_txt); + if (keyseg->flag & HA_BLOB_PART) + pos=strmov(pos,blob_txt); + if (keyseg->flag & HA_NULL_PART) + pos=strmov(pos,null_txt); + *pos=0; + printf(" %-6ld%-3d %-21s", + (long) keyseg->start+1,keyseg->length,buff); + if (param->testflag & T_VERBOSE) + printf("%11lu", share->state.rec_per_key_part[keyseg_nr++]); + VOID(putchar('\n')); + } + keyseg++; + } + if (share->state.header.uniques) + { + MARIA_UNIQUEDEF *uniqueinfo; + puts("\nUnique Key Start Len Nullpos Nullbit Type"); + for (key=0,uniqueinfo= &share->uniqueinfo[0] ; + key < share->state.header.uniques; key++, uniqueinfo++) + { + my_bool new_row=0; + char null_bit[8],null_pos[8]; + printf("%-8d%-5d",key+1,uniqueinfo->key+1); + for (keyseg=uniqueinfo->seg ; keyseg->type != HA_KEYTYPE_END ; keyseg++) + { + if (new_row) + fputs(" ",stdout); + null_bit[0]=null_pos[0]=0; + if (keyseg->null_bit) + { + sprintf(null_bit,"%d",keyseg->null_bit); + sprintf(null_pos,"%ld",(long) keyseg->null_pos+1); + } + printf("%-7ld%-5d%-9s%-10s%-30s\n", + (long) keyseg->start+1,keyseg->length, + null_pos,null_bit, + type_names[keyseg->type]); + new_row=1; + } + } + } + if (param->verbose > 1) + { + char null_bit[8],null_pos[8]; + printf("\nField Start Length Nullpos Nullbit Type"); + if (share->options & HA_OPTION_COMPRESS_RECORD) + printf(" Huff tree Bits"); + VOID(putchar('\n')); + start=1; + for (field=0 ; field < share->base.fields ; field++) + { + if (share->options & HA_OPTION_COMPRESS_RECORD) + type=share->rec[field].base_type; + else + type=(enum en_fieldtype) share->rec[field].type; + end=strmov(buff,field_pack[type]); + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + if (share->rec[field].pack_type & PACK_TYPE_SELECTED) + end=strmov(end,", not_always"); + if (share->rec[field].pack_type & PACK_TYPE_SPACE_FIELDS) + end=strmov(end,", no empty"); + if (share->rec[field].pack_type & PACK_TYPE_ZERO_FILL) + { + sprintf(end,", zerofill(%d)",share->rec[field].space_length_bits); + end=strend(end); + } + } + if (buff[0] == ',') + strmov(buff,buff+2); + int10_to_str((long) share->rec[field].length,length,10); + null_bit[0]=null_pos[0]=0; + if (share->rec[field].null_bit) + { + sprintf(null_bit,"%d",share->rec[field].null_bit); + sprintf(null_pos,"%d",share->rec[field].null_pos+1); + } + printf("%-6d%-6d%-7s%-8s%-8s%-35s",field+1,start,length, + null_pos, null_bit, buff); + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + if (share->rec[field].huff_tree) + printf("%3d %2d", + (uint) (share->rec[field].huff_tree-share->decode_trees)+1, + share->rec[field].huff_tree->quick_table_bits); + } + VOID(putchar('\n')); + start+=share->rec[field].length; + } + } + DBUG_VOID_RETURN; +} /* describe */ + + + /* Sort records according to one key */ + +static int maria_sort_records(HA_CHECK *param, + register MARIA_HA *info, my_string name, + uint sort_key, + my_bool write_info, + my_bool update_index) +{ + int got_error; + uint key; + MARIA_KEYDEF *keyinfo; + File new_file; + uchar *temp_buff; + ha_rows old_record_count; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO sort_info; + MARIA_SORT_PARAM sort_param; + DBUG_ENTER("sort_records"); + + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char*)&sort_param,sizeof(sort_param)); + sort_param.sort_info=&sort_info; + sort_info.param=param; + keyinfo= &share->keyinfo[sort_key]; + got_error=1; + temp_buff=0; + new_file= -1; + + if (! maria_is_key_active(share->state.key_map, sort_key)) + { + _ma_check_print_warning(param, + "Can't sort table '%s' on key %d; No such key", + name,sort_key+1); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (keyinfo->flag & HA_FULLTEXT) + { + _ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d", + name,sort_key+1); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (share->data_file_type == COMPRESSED_RECORD) + { + _ma_check_print_warning(param,"Can't sort read-only table '%s'", name); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (!(param->testflag & T_SILENT)) + { + printf("- Sorting records for MARIA-table '%s'\n",name); + if (write_info) + printf("Data records: %9s Deleted: %9s\n", + llstr(info->state->records,llbuff), + llstr(info->state->del,llbuff2)); + } + if (share->state.key_root[sort_key] == HA_OFFSET_ERROR) + DBUG_RETURN(0); /* Nothing to do */ + + init_key_cache(maria_key_cache, opt_key_cache_block_size, param->use_buffers, + 0, 0); + if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, + WRITE_CACHE,share->pack.header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + + if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for key block"); + goto err; + } + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for record"); + goto err; + } + fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32); + new_file=my_raid_create(fn_format(param->temp_filename, + param->temp_filename,"", + DATA_TMP_EXT,2+4), + 0,param->tmpfile_createflag, + share->base.raid_type, + share->base.raid_chunks, + share->base.raid_chunksize, + MYF(0)); + if (new_file < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (share->pack.header_length) + if (maria_filecopy(param,new_file,info->dfile,0L,share->pack.header_length, + "datafile-header")) + goto err; + info->rec_cache.file=new_file; /* Use this file for cacheing*/ + + maria_lock_memory(param); + for (key=0 ; key < share->base.keys ; key++) + share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME; + + if (my_pread(share->kfile,(byte*) temp_buff, + (uint) keyinfo->block_length, + share->state.key_root[sort_key], + MYF(MY_NABP+MY_WME))) + { + _ma_check_print_error(param,"Can't read indexpage from filepos: %s", + (ulong) share->state.key_root[sort_key]); + goto err; + } + + /* Setup param for _ma_sort_write_record */ + sort_info.info=info; + sort_info.new_data_file_type=share->data_file_type; + sort_param.fix_datafile=1; + sort_param.master=1; + sort_param.filepos=share->pack.header_length; + old_record_count=info->state->records; + info->state->records=0; + if (sort_info.new_data_file_type != COMPRESSED_RECORD) + info->state->checksum=0; + + if (sort_record_index(&sort_param,info,keyinfo,share->state.key_root[sort_key], + temp_buff, sort_key,new_file,update_index) || + maria_write_data_suffix(&sort_info,1) || + flush_io_cache(&info->rec_cache)) + goto err; + + if (info->state->records != old_record_count) + { + _ma_check_print_error(param,"found %s of %s records", + llstr(info->state->records,llbuff), + llstr(old_record_count,llbuff2)); + goto err; + } + + VOID(my_close(info->dfile,MYF(MY_WME))); + param->out_flag|=O_NEW_DATA; /* Data in new file */ + info->dfile=new_file; /* Use new datafile */ + info->state->del=0; + info->state->empty=0; + share->state.dellink= HA_OFFSET_ERROR; + info->state->data_file_length=sort_param.filepos; + share->state.split=info->state->records; /* Only hole records */ + share->state.version=(ulong) time((time_t*) 0); + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + got_error=0; + +err: + if (got_error && new_file >= 0) + { + VOID(end_io_cache(&info->rec_cache)); + (void) my_close(new_file,MYF(MY_WME)); + (void) my_raid_delete(param->temp_filename, share->base.raid_chunks, + MYF(MY_WME)); + } + if (temp_buff) + { + my_afree((gptr) temp_buff); + } + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + VOID(end_io_cache(&info->rec_cache)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + sort_info.buff=0; + share->state.sortkey=sort_key; + DBUG_RETURN(_ma_flush_blocks(param, share->key_cache, share->kfile) | + got_error); +} /* sort_records */ + + + /* Sort records recursive using one index */ + +static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, uchar *buff, uint sort_key, + File new_file,my_bool update_index) +{ + uint nod_flag,used_length,key_length; + uchar *temp_buff,*keypos,*endpos; + my_off_t next_page,rec_pos; + uchar lastkey[HA_MAX_KEY_BUFF]; + char llbuff[22]; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; + DBUG_ENTER("sort_record_index"); + + nod_flag=_ma_test_if_nod(buff); + temp_buff=0; + + if (nod_flag) + { + if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not Enough memory"); + DBUG_RETURN(-1); + } + } + used_length=maria_getint(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + for ( ;; ) + { + _sanity(__FILE__,__LINE__); + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + if (my_pread(info->s->kfile,(byte*) temp_buff, + (uint) keyinfo->block_length, next_page, + MYF(MY_NABP+MY_WME))) + { + _ma_check_print_error(param,"Can't read keys from filepos: %s", + llstr(next_page,llbuff)); + goto err; + } + if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff,sort_key, + new_file, update_index)) + goto err; + } + _sanity(__FILE__,__LINE__); + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) + == 0) + break; + rec_pos= _ma_dpos(info,0,lastkey+key_length); + + if ((*info->s->read_rnd)(info,sort_param->record,rec_pos,0)) + { + _ma_check_print_error(param,"%d when reading datafile",my_errno); + goto err; + } + if (rec_pos != sort_param->filepos && update_index) + { + _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength, + sort_param->filepos); + if (maria_movepoint(info,sort_param->record,rec_pos,sort_param->filepos, + sort_key)) + { + _ma_check_print_error(param,"%d when updating key-pointers",my_errno); + goto err; + } + } + if (_ma_sort_write_record(sort_param)) + goto err; + } + /* Clear end of block to get better compression if the table is backuped */ + bzero((byte*) buff+used_length,keyinfo->block_length-used_length); + if (my_pwrite(info->s->kfile,(byte*) buff,(uint) keyinfo->block_length, + page,param->myf_rw)) + { + _ma_check_print_error(param,"%d when updating keyblock",my_errno); + goto err; + } + if (temp_buff) + my_afree((gptr) temp_buff); + DBUG_RETURN(0); +err: + if (temp_buff) + my_afree((gptr) temp_buff); + DBUG_RETURN(1); +} /* sort_record_index */ + + + +/* + Check if mariachk was killed by a signal + This is overloaded by other programs that want to be able to abort + sorting +*/ + +static int not_killed= 0; + +volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused))) +{ + return ¬_killed; /* always NULL */ +} + + /* print warnings and errors */ + /* VARARGS */ + +void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), + const char *fmt,...) +{ + va_list args; + + va_start(args,fmt); + VOID(vfprintf(stdout, fmt, args)); + VOID(fputc('\n',stdout)); + va_end(args); +} + +/* VARARGS */ + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_warning"); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short, + param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->warning_printed=1; + va_start(args,fmt); + fprintf(stderr,"%s: warning: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} + +/* VARARGS */ + +void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_error"); + DBUG_PRINT("enter",("format: %s",fmt)); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->error_printed|=1; + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h new file mode 100644 index 00000000000..e3f0219a663 --- /dev/null +++ b/storage/maria/maria_def.h @@ -0,0 +1,751 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file is included by all internal maria files */ + +#include "maria.h" /* Structs & some defines */ +#include "myisampack.h" /* packing of keys */ +#include +#ifdef THREAD +#include +#include +#else +#include +#endif + +/* undef map from my_nosys; We need test-if-disk full */ +#undef my_write + +typedef struct st_maria_status_info +{ + ha_rows records; /* Rows in table */ + ha_rows del; /* Removed rows */ + my_off_t empty; /* lost space in datafile */ + my_off_t key_empty; /* lost space in indexfile */ + my_off_t key_file_length; + my_off_t data_file_length; + ha_checksum checksum; +} MARIA_STATUS_INFO; + +typedef struct st_maria_state_info +{ + struct + { /* Fileheader */ + uchar file_version[4]; + uchar options[2]; + uchar header_length[2]; + uchar state_info_length[2]; + uchar base_info_length[2]; + uchar base_pos[2]; + uchar key_parts[2]; /* Key parts */ + uchar unique_key_parts[2]; /* Key parts + unique parts */ + uchar keys; /* number of keys in file */ + uchar uniques; /* number of UNIQUE definitions */ + uchar language; /* Language for indexes */ + uchar max_block_size; /* max keyblock size */ + uchar fulltext_keys; + uchar not_used; /* To align to 8 */ + } header; + + MARIA_STATUS_INFO state; + ha_rows split; /* number of split blocks */ + my_off_t dellink; /* Link to next removed block */ + ulonglong auto_increment; + ulong process; /* process that updated table last */ + ulong unique; /* Unique number for this process */ + ulong update_count; /* Updated for each write lock */ + ulong status; + ulong *rec_per_key_part; + my_off_t *key_root; /* Start of key trees */ + my_off_t *key_del; /* delete links for trees */ + my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ + + ulong sec_index_changed; /* Updated when new sec_index */ + ulong sec_index_used; /* which extra index are in use */ + ulonglong key_map; /* Which keys are in use */ + ulong version; /* timestamp of create */ + time_t create_time; /* Time when created database */ + time_t recover_time; /* Time for last recover */ + time_t check_time; /* Time for last check */ + uint sortkey; /* sorted by this key (not used) */ + uint open_count; + uint8 changed; /* Changed since mariachk */ + + /* the following isn't saved on disk */ + uint state_diff_length; /* Should be 0 */ + uint state_length; /* Length of state header in file */ + ulong *key_info; +} MARIA_STATE_INFO; + + +#define MARIA_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) +#define MARIA_STATE_KEY_SIZE 8 +#define MARIA_STATE_KEYBLOCK_SIZE 8 +#define MARIA_STATE_KEYSEG_SIZE 4 +#define MARIA_STATE_EXTRA_SIZE ((MARIA_MAX_KEY+MARIA_MAX_KEY_BLOCK_SIZE)*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE) +#define MARIA_KEYDEF_SIZE (2+ 5*2) +#define MARIA_UNIQUEDEF_SIZE (2+1+1) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define MARIA_COLUMNDEF_SIZE (2*3+1) +#define MARIA_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) +#define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ + +typedef struct st__ma_base_info +{ + my_off_t keystart; /* Start of keys */ + my_off_t max_data_file_length; + my_off_t max_key_file_length; + my_off_t margin_key_file_length; + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec */ + ulong min_pack_length; + ulong max_pack_length; /* Max possibly length of + packed rec. */ + ulong min_block_length; + ulong fields, /* fields in table */ + pack_fields; /* packed fields in table */ + uint rec_reflength; /* = 2-8 */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + uint pack_bits; /* Length of packed bits */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ + /* Extra allocation when using dynamic record format */ + uint extra_alloc_bytes; + uint extra_alloc_procent; + /* Info about raid */ + uint raid_type, raid_chunks; + ulong raid_chunksize; + /* The following are from the header */ + uint key_parts, all_key_parts; +} MARIA_BASE_INFO; + + + /* Structs used intern in database */ + +typedef struct st_maria_blob /* Info of record */ +{ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ +} MARIA_BLOB; + + +typedef struct st_maria_pack +{ + ulong header_length; + uint ref_length; + uchar version; +} MARIA_PACK; + +#define MAX_NONMAPPED_INSERTS 1000 + +typedef struct st_maria_share +{ /* Shared between opens */ + MARIA_STATE_INFO state; + MARIA_BASE_INFO base; + MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key + definition */ + MARIA_KEYDEF *keyinfo; /* Key definitions */ + MARIA_UNIQUEDEF *uniqueinfo; /* unique definitions */ + HA_KEYSEG *keyparts; /* key part info */ + MARIA_COLUMNDEF *rec; /* Pointer to field information + */ + MARIA_PACK pack; /* Data about packed records */ + MARIA_BLOB *blobs; /* Pointer to blobs */ + char *unique_file_name; /* realpath() of index file */ + char *data_file_name, /* Resolved path names from + symlinks */ + *index_file_name; + byte *file_map; /* mem-map of file if possible */ + KEY_CACHE *key_cache; /* ref to the current key cache + */ + MARIA_DECODE_TREE *decode_trees; + uint16 *decode_tables; + int(*read_record) (struct st_maria_info *, my_off_t, byte *); + int(*write_record) (struct st_maria_info *, const byte *); + int(*update_record) (struct st_maria_info *, my_off_t, const byte *); + int(*delete_record) (struct st_maria_info *); + int(*read_rnd) (struct st_maria_info *, byte *, my_off_t, my_bool); + int(*compare_record) (struct st_maria_info *, const byte *); + ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); + int(*compare_unique) (struct st_maria_info *, MARIA_UNIQUEDEF *, + const byte *record, my_off_t pos); + uint(*file_read) (MARIA_HA *, byte *, uint, my_off_t, myf); + uint(*file_write) (MARIA_HA *, byte *, uint, my_off_t, myf); + invalidator_by_filename invalidator; /* query cache invalidator */ + ulong this_process; /* processid */ + ulong last_process; /* For table-change-check */ + ulong last_version; /* Version on start */ + ulong options; /* Options used */ + ulong min_pack_length; /* Theese are used by packed + data */ + ulong max_pack_length; + ulong state_diff_length; + uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; + uint32 ftparsers; /* Number of distinct ftparsers + + 1 */ + File kfile; /* Shared keyfile */ + File data_file; /* Shared data file */ + int mode; /* mode of file on open */ + uint reopen; /* How many times reopened */ + uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ + uint blocksize; /* blocksize of keyfile */ + myf write_flag; + enum data_file_type data_file_type; + my_bool changed, /* If changed since lock */ + global_changed, /* If changed since open */ + not_flushed, temporary, delay_key_write, concurrent_insert; +#ifdef THREAD + THR_LOCK lock; + pthread_mutex_t intern_lock; /* Locking for use with + _locking */ + rw_lock_t *key_root_lock; +#endif + my_off_t mmaped_length; + uint nonmmaped_inserts; /* counter of writing in + non-mmaped area */ + rw_lock_t mmap_lock; +} MARIA_SHARE; + + +typedef uint maria_bit_type; + +typedef struct st_maria_bit_buff +{ /* Used for packing of record */ + maria_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MARIA_BIT_BUFF; + +struct st_maria_info +{ + MARIA_SHARE *s; /* Shared between open:s */ + MARIA_STATUS_INFO *state, save_state; + MARIA_BLOB *blobs; /* Pointer to blobs */ + MARIA_BIT_BUFF bit_buff; + /* accumulate indexfile changes between write's */ + TREE *bulk_insert; + DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ + MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + char *filename; /* parameter to open filename */ + uchar *buff, /* Temp area for key */ + *lastkey, *lastkey2; /* Last used search key */ + uchar *first_mbr_key; /* Searhed spatial key */ + byte *rec_buff; /* Tempbuff for recordpack */ + uchar *int_keypos, /* Save position for next/previous */ + *int_maxpos; /* -""- */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ + int(*read_record) (struct st_maria_info *, my_off_t, byte *); + invalidator_by_filename invalidator; /* query cache invalidator */ + ulong this_unique; /* uniq filenumber or thread */ + ulong last_unique; /* last unique number */ + ulong this_loop; /* counter for this open */ + ulong last_loop; /* last used counter */ + my_off_t lastpos, /* Last record position */ + nextpos; /* Position to next record */ + my_off_t save_lastpos; + my_off_t pos; /* Intern variable */ + my_off_t last_keypage; /* Last key page read */ + my_off_t last_search_keypage; /* Last keypage when searching */ + my_off_t dupp_key_pos; + ha_checksum checksum; + /* + QQ: the folloing two xxx_length fields should be removed, + as they are not compatible with parallel repair + */ + ulong packed_length, blob_length; /* Length of found, packed record */ + int dfile; /* The datafile */ + uint opt_flag; /* Optim. for space/speed */ + uint update; /* If file changed since open */ + int lastinx; /* Last used index */ + uint lastkey_length; /* Length of key in lastkey */ + uint last_rkey_length; /* Last length in maria_rkey() */ + enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ + uint save_lastkey_length; + uint pack_key_length; /* For MARIAMRG */ + int errkey; /* Got last error on this key */ + int lock_type; /* How database was locked */ + int tmp_lock_type; /* When locked by readinfo */ + uint data_changed; /* Somebody has changed data */ + uint save_update; /* When using KEY_READ */ + int save_lastinx; + LIST open_list; + IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ + myf lock_wait; /* is 0 or MY_DONT_WAIT */ + my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ + my_bool quick_mode; + /* If info->buff can't be used for rnext */ + my_bool page_changed; + /* If info->buff has to be reread for rnext */ + my_bool buff_used; + my_bool once_flags; /* For MARIAMRG */ +#ifdef THREAD + THR_LOCK_DATA lock; +#endif + uchar *maria_rtree_recursion_state; /* For RTREE */ + int maria_rtree_recursion_depth; +}; + +/* Some defines used by isam-funktions */ + +#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _search() */ +#define F_EXTRA_LCK -1 + +/* bits in opt_flag */ +#define MEMMAP_USED 32 +#define REMEMBER_OLD_POS 64 + +#define WRITEINFO_UPDATE_KEYFILE 1 +#define WRITEINFO_NO_UNLOCK 2 + +/* once_flags */ +#define USE_PACKED_KEYS 1 +#define RRND_PRESERVE_LASTINX 2 + +/* bits in state.changed */ + +#define STATE_CHANGED 1 +#define STATE_CRASHED 2 +#define STATE_CRASHED_ON_REPAIR 4 +#define STATE_NOT_ANALYZED 8 +#define STATE_NOT_OPTIMIZED_KEYS 16 +#define STATE_NOT_SORTED_PAGES 32 + +/* options to maria_read_cache */ + +#define READING_NEXT 1 +#define READING_HEADER 2 + +#define maria_getint(x) ((uint) mi_uint2korr(x) & 32767) +#define maria_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ + mi_int2store(x,boh); } +#define _ma_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) +#define maria_mark_crashed(x) (x)->s->state.changed|=STATE_CRASHED +#define maria_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; } +#define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) +#define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) +#define maria_print_error(SHARE, ERRNO) \ + _ma_report_error((ERRNO), (SHARE)->index_file_name) + +/* Functions to store length of space packed keys, VARCHAR or BLOB keys */ + +#define store_key_length(key,length) \ +{ if ((length) < 255) \ + { *(key)=(length); } \ + else \ + { *(key)=255; mi_int2store((key)+1,(length)); } \ +} + +#define get_key_full_length(length,key) \ +{ if ((uchar) *(key) != 255) \ + length= ((uint) (uchar) *((key)++))+1; \ + else \ + { length=mi_uint2korr((key)+1)+3; (key)+=3; } \ +} + +#define get_key_full_length_rdonly(length,key) \ +{ if ((uchar) *(key) != 255) \ + length= ((uint) (uchar) *((key)))+1; \ + else \ + { length=mi_uint2korr((key)+1)+3; } \ +} + +#define get_pack_length(length) ((length) >= 255 ? 3 : 1) + +#define MARIA_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ +/* Don't use to small record-blocks */ +#define MARIA_EXTEND_BLOCK_LENGTH 20 +#define MARIA_SPLIT_LENGTH ((MARIA_EXTEND_BLOCK_LENGTH+4)*2) + /* Max prefix of record-block */ +#define MARIA_MAX_DYN_BLOCK_HEADER 20 +#define MARIA_BLOCK_INFO_HEADER_LENGTH 20 +#define MARIA_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ +#define MARIA_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) +#define MARIA_DYN_MAX_ROW_LENGTH (MARIA_DYN_MAX_BLOCK_LENGTH - MARIA_SPLIT_LENGTH) +#define MARIA_DYN_ALIGN_SIZE 4 /* Align blocks on this */ +#define MARIA_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ +#define MARIA_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1))) +#define MARIA_REC_BUFF_OFFSET ALIGN_SIZE(MARIA_DYN_DELETE_BLOCK_HEADER+sizeof(uint32)) + +#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ + +#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ +#define PACK_TYPE_SPACE_FIELDS 2 +#define PACK_TYPE_ZERO_FILL 4 +#define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ + +#define MARIA_MAX_KEY_BLOCK_SIZE (MARIA_MAX_KEY_BLOCK_LENGTH/MARIA_MIN_KEY_BLOCK_LENGTH) +#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/maria_block_size+1)*maria_block_size) +#define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ +#define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ + +#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ +#define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100 +#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100 +#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10 + +/* The UNIQUE check is done with a hashed long key */ + +#define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT +#define maria_unique_store(A,B) mi_int4store((A),(B)) + +#ifdef THREAD +extern pthread_mutex_t THR_LOCK_maria; +#endif +#if !defined(THREAD) || defined(DONT_USE_RW_LOCKS) +#define rw_wrlock(A) {} +#define rw_rdlock(A) {} +#define rw_unlock(A) {} +#endif + + /* Some extern variables */ + +extern LIST *maria_open_list; +extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; +extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; +extern uint maria_quick_table_bits; +extern File maria_log_file; +extern ulong maria_pid; + + /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ + +typedef struct st_maria_s_param +{ + uint ref_length, key_length, + n_ref_length, + n_length, totlength, part_of_prev_key, prev_length, pack_marker; + uchar *key, *prev_key, *next_key_pos; + bool store_not_null; +} MARIA_KEY_PARAM; + + /* Prototypes for intern functions */ + +extern int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, + byte *buf); +extern int _ma_write_dynamic_record(MARIA_HA *, const byte *); +extern int _ma_update_dynamic_record(MARIA_HA *, my_off_t, const byte *); +extern int _ma_delete_dynamic_record(MARIA_HA *info); +extern int _ma_cmp_dynamic_record(MARIA_HA *info, const byte *record); +extern int _ma_read_rnd_dynamic_record(MARIA_HA *, byte *, my_off_t, + my_bool); +extern int _ma_write_blob_record(MARIA_HA *, const byte *); +extern int _ma_update_blob_record(MARIA_HA *, my_off_t, const byte *); +extern int _ma_read_static_record(MARIA_HA *info, my_off_t filepos, + byte *buf); +extern int _ma_write_static_record(MARIA_HA *, const byte *); +extern int _ma_update_static_record(MARIA_HA *, my_off_t, const byte *); +extern int _ma_delete_static_record(MARIA_HA *info); +extern int _ma_cmp_static_record(MARIA_HA *info, const byte *record); +extern int _ma_read_rnd_static_record(MARIA_HA *, byte *, my_off_t, my_bool); +extern int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, + uint length); +extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, uint key_length, + my_off_t *root, uint comp_flag); +extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, my_off_t *root); +extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uchar *anc_buff, uchar *key_pos, uchar *key_buff, + uchar *father_buff, uchar *father_keypos, + my_off_t father_page, my_bool insert_last); +extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, uchar *buff, uchar *key_buff, + my_bool insert_last); +extern uchar *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, + uchar *page, uchar *key, + uint *return_key_length, + uchar ** after_key); +extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, + uint nod_flag, uchar *key_pos, + uchar *org_key, uchar *prev_key, + uchar *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, + uint nod_flag, uchar *key_pos, + uchar *org_key, uchar *prev_key, + uchar *key, + MARIA_KEY_PARAM *s_temp); +void _ma_store_static_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, + MARIA_KEY_PARAM *s_temp); +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, + MARIA_KEY_PARAM *s_temp); +#ifdef NOT_USED +void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, + MARIA_KEY_PARAM *s_temp); +#endif +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, + MARIA_KEY_PARAM *s_temp); + +extern int _ma_ck_delete(MARIA_HA *info, uint keynr, uchar *key, + uint key_length); +extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer); +extern int _ma_writeinfo(MARIA_HA *info, uint options); +extern int _ma_test_if_changed(MARIA_HA *info); +extern int _ma_mark_file_changed(MARIA_HA *info); +extern int _ma_decrement_open_count(MARIA_HA *info); +extern int _ma_check_index(MARIA_HA *info, int inx); +extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, + uint key_len, uint nextflag, my_off_t pos); +extern int _ma_bin_search(struct st_maria_info *info, MARIA_KEYDEF *keyinfo, + uchar *page, uchar *key, uint key_len, + uint comp_flag, uchar **ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *page, uchar *key, uint key_len, + uint comp_flag, uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *page, uchar *key, uint key_len, + uint comp_flag, uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern my_off_t _ma_kpos(uint nod_flag, uchar *after_key); +extern void _ma_kpointer(MARIA_HA *info, uchar *buff, my_off_t pos); +extern my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, uchar *after_key); +extern my_off_t _ma_rec_pos(MARIA_SHARE *info, uchar *ptr); +extern void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos); +extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar **page, uchar *key); +extern uint _ma_get_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar **page, uchar *key); +extern uint _ma_get_binary_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + uchar ** page_pos, uchar *key); +extern uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *keypos, uchar *lastkey, + uchar *endpos, uint *return_key_length); +extern uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *page, uchar *key, uchar *keypos, + uint *return_key_length); +extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, uchar *key); +extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register uchar *key, + HA_KEYSEG *end); +extern uchar *_ma_move_key(MARIA_KEYDEF *keyinfo, uchar *to, uchar *from); +extern int _ma_search_next(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, uint key_length, uint nextflag, + my_off_t pos); +extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pos); +extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pos); +extern uchar *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, uchar *buff, + int return_buffer); +extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, uchar *buff); +extern int _ma_dispose(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, + int level); +extern my_off_t _ma_new(MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level); +extern uint _ma_make_key(MARIA_HA *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos); +extern uint _ma_pack_key(MARIA_HA *info, uint keynr, uchar *key, + uchar *old, uint key_length, + HA_KEYSEG ** last_used_keyseg); +extern int _ma_read_key_record(MARIA_HA *info, my_off_t filepos, + byte *buf); +extern int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, + uint length, int re_read_if_possibly); +extern void _ma_update_auto_increment(MARIA_HA *info, const byte *record); + +extern byte *_ma_alloc_rec_buff(MARIA_HA *, ulong, byte **); +#define _ma_get_rec_buff_ptr(info,buf) \ + ((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \ + (buf) - MARIA_REC_BUFF_OFFSET : (buf)) +#define _ma_get_rec_buff_len(info,buf) \ + (*((uint32 *)(_ma_get_rec_buff_ptr(info,buf)))) + +extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from, + ulong reclength); +extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, + byte *packpos, ulong packed_length, + my_bool with_checkum); +extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, + ulong length, my_off_t next_filepos, + byte ** record, ulong *reclength, + int *flag); +extern void _ma_print_key(FILE *stream, HA_KEYSEG *keyseg, + const uchar *key, uint length); +extern my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys); +extern int _ma_read_pack_record(MARIA_HA *info, my_off_t filepos, + byte *buf); +extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, my_off_t, my_bool); +extern int _ma_pack_rec_unpack(MARIA_HA *info, byte *to, byte *from, + ulong reclength); +extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b); +extern int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, + const byte *oldrec, const byte *newrec, + my_off_t pos); + +/* Parameter to _ma_get_block_info */ + +typedef struct st_maria_block_info +{ + uchar header[MARIA_BLOCK_INFO_HEADER_LENGTH]; + ulong rec_len; + ulong data_len; + ulong block_len; + ulong blob_len; + my_off_t filepos; + my_off_t next_filepos; + my_off_t prev_filepos; + uint second_read; + uint offset; +} MARIA_BLOCK_INFO; + +/* bits in return from _ma_get_block_info */ + +#define BLOCK_FIRST 1 +#define BLOCK_LAST 2 +#define BLOCK_DELETED 4 +#define BLOCK_ERROR 8 /* Wrong data */ +#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ +#define BLOCK_FATAL_ERROR 32 /* hardware-error */ + +#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ +#define MAXERR 20 +#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE +#define INDEX_TMP_EXT ".TMM" +#define DATA_TMP_EXT ".TMD" + +#define UPDATE_TIME 1 +#define UPDATE_STAT 2 +#define UPDATE_SORT 4 +#define UPDATE_AUTO_INC 8 +#define UPDATE_OPEN_COUNT 16 + +#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) +#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) + +enum maria_log_commands +{ + MARIA_LOG_OPEN, MARIA_LOG_WRITE, MARIA_LOG_UPDATE, MARIA_LOG_DELETE, + MARIA_LOG_CLOSE, MARIA_LOG_EXTRA, MARIA_LOG_LOCK, MARIA_LOG_DELETE_ALL +}; + +#define maria_log(a,b,c,d) if (maria_log_file >= 0) _ma_log(a,b,c,d) +#define maria_log_command(a,b,c,d,e) if (maria_log_file >= 0) _ma_log_command(a,b,c,d,e) +#define maria_log_record(a,b,c,d,e) if (maria_log_file >= 0) _ma_log_record(a,b,c,d,e) + +#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0) +#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1) + +extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t); +extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from); +extern uint _ma_pack_get_block_info(MARIA_HA *, MARIA_BLOCK_INFO *, File, + my_off_t); +extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length); +extern void _ma_log(enum maria_log_commands command, MARIA_HA *info, + const byte *buffert, uint length); +extern void _ma_log_command(enum maria_log_commands command, + MARIA_HA *info, const byte *buffert, + uint length, int result); +extern void _ma_log_record(enum maria_log_commands command, MARIA_HA *info, + const byte *record, my_off_t filepos, + int result); +extern void _ma_report_error(int errcode, const char *file_name); +extern my_bool _ma_memmap_file(MARIA_HA *info); +extern void _ma_unmap_file(MARIA_HA *info); +extern uint _ma_save_pack_length(uint version, byte * block_buff, + ulong length); +extern uint _ma_calc_pack_length(uint version, ulong length); +extern ulong _ma_calc_blob_length(uint length, const byte *pos); +extern uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); + +uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite); +uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state); +uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, + my_bool pRead); +uint _ma_base_info_write(File file, MARIA_BASE_INFO *base); +uchar *_ma_n_base_info_read(uchar *ptr, MARIA_BASE_INFO *base); +int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg); +char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg); +uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef); +char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef); +uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *keydef); +char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *keydef); +uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo); +char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo); +ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record); +ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf); +ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *buf); +my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + byte *record, ha_checksum unique_hash, + my_off_t pos); +ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *buf); +int _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, my_off_t pos); +int _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, my_off_t pos); +int _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal); +void _ma_get_status(void *param, int concurrent_insert); +void _ma_update_status(void *param); +void _ma_copy_status(void *to, void *from); +my_bool _ma_check_status(void *param); + +extern MARIA_HA *_ma_test_if_reopen(char *filename); +my_bool _ma_check_table_is_closed(const char *name, const char *where); +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup); +int _ma_open_keyfile(MARIA_SHARE *share); +void _ma_setup_functions(register MARIA_SHARE *share); +my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size); +void _ma_remap_file(MARIA_HA *info, my_off_t size); + +/* Functions needed by _ma_check (are overrided in MySQL) */ +C_MODE_START +volatile int *_ma_killed_ptr(HA_CHECK *param); +void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); +C_MODE_END + +int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param); +int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param); +int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param); +#ifdef THREAD +pthread_handler_t _ma_thr_find_all_keys(void *arg); +#endif +int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file); + +int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, + ulong); diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c new file mode 100644 index 00000000000..eb5dec5aa3b --- /dev/null +++ b/storage/maria/maria_ftdump.c @@ -0,0 +1,279 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include + +static void usage(); +static void complain(int val); +static my_bool get_one_option(int, const struct my_option *, char *); + +static int count=0, stats=0, dump=0, lstats=0; +static my_bool verbose; +static char *query=NULL; +static uint lengths[256]; + +#define MAX_LEN (HA_FT_MAXBYTELEN+10) +#define HOW_OFTEN_TO_WRITE 10000 + +static struct my_option my_long_options[] = +{ + {"dump", 'd', "Dump index (incl. data offsets and word weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"stats", 's', "Report global stats.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be verbose.", + (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"count", 'c', "Calculate per-word stats (counts and global weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"length", 'l', "Report length distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", 'h', "Display help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', "Synonym for -h.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +int main(int argc,char *argv[]) +{ + int error=0, subkeys; + uint keylen, keylen2=0, inx, doc_cnt=0; + float weight= 1.0; + double gws, min_gws=0, avg_gws=0; + MARIA_HA *info; + char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; + ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; + struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ + + MY_INIT(argv[0]); + if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(error); + maria_init(); + if (count || dump) + verbose=0; + if (!count && !dump && !lstats && !query) + stats=1; + + if (verbose) + setbuf(stdout,NULL); + + if (argc < 2) + usage(); + + { + char *end; + inx= (uint) strtoll(argv[1], &end, 10); + if (*end) + usage(); + } + + init_key_cache(maria_key_cache,MARIA_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); + + if (!(info=maria_open(argv[0],2,HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) + { + error=my_errno; + goto err; + } + + *buf2=0; + aio->info=info; + + if ((inx >= info->s->base.keys) || + !(info->s->keyinfo[inx].flag & HA_FULLTEXT)) + { + printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename); + goto err; + } + + maria_lock_database(info, F_EXTRA_LCK); + + info->lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_PREV_FOUND; + + while (!(error=maria_rnext(info,NULL,inx))) + { + keylen=*(info->lastkey); + + subkeys=ft_sintXkorr(info->lastkey+keylen+1); + if (subkeys >= 0) + weight=*(float*)&subkeys; + +#ifdef HAVE_SNPRINTF + snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); +#else + sprintf(buf,"%.*s",(int) keylen,info->lastkey+1); +#endif + my_casedn_str(default_charset_info,buf); + total++; + lengths[keylen]++; + + if (count || stats) + { + doc_cnt++; + if (strcmp(buf, buf2)) + { + if (*buf2) + { + uniq++; + avg_gws+=gws=GWS_IN_USE; + if (count) + printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); + if (maxlen=0) + printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf); + else + printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf); + } + if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) + printf("%10ld\r",total); + } + maria_lock_database(info, F_UNLCK); + + if (count || stats) + { + doc_cnt++; + if (*buf2) + { + uniq++; + avg_gws+=gws=GWS_IN_USE; + if (count) + printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); + if (maxlen= total/2) + break; + } + printf("Total rows: %lu\nTotal words: %lu\n" + "Unique words: %lu\nLongest word: %lu chars (%s)\n" + "Median length: %u\n" + "Average global weight: %f\n" + "Most common word: %lu times, weight: %f (%s)\n", + (long) info->state->records, total, uniq, maxlen, buf_maxlen, + inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); + } + if (lstats) + { + count=0; + for (inx=0; inx<256; inx++) + { + count+=lengths[inx]; + if (count && lengths[inx]) + printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, + (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, + 100.0*count/total); + } + } + +err: + if (error && error != HA_ERR_END_OF_FILE) + printf("got error %d\n",my_errno); + if (info) + maria_close(info); + maria_end(); + return 0; +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch(optid) { + case 'd': + dump=1; + complain(count || query); + break; + case 's': + stats=1; + complain(query!=0); + break; + case 'c': + count= 1; + complain(dump || query); + break; + case 'l': + lstats=1; + complain(query!=0); + break; + case '?': + case 'h': + usage(); + } + return 0; +} + +#include + +static void usage() +{ + printf("Use: maria_ft_dump \n"); + my_print_help(my_long_options); + my_print_variables(my_long_options); + NETWARE_SET_SCREEN_MODE(1); + exit(1); +} + +#include + +static void complain(int val) /* Kinda assert :-) */ +{ + if (val) + { + printf("You cannot use these options together!\n"); + exit(1); + } +} diff --git a/storage/maria/maria_log.c b/storage/maria/maria_log.c new file mode 100644 index 00000000000..72a4d7e89d5 --- /dev/null +++ b/storage/maria/maria_log.c @@ -0,0 +1,848 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* write whats in isam.log */ + +#ifndef USE_MY_FUNC +#define USE_MY_FUNC +#endif + +#include "maria_def.h" +#include +#include +#ifdef HAVE_GETRUSAGE +#include +#endif + +#define FILENAME(A) (A ? A->show_name : "Unknown") + +struct file_info { + long process; + int filenr,id; + uint rnd; + my_string name,show_name,record; + MARIA_HA *isam; + bool closed,used; + ulong accessed; +}; + +struct test_if_open_param { + my_string name; + int max_id; +}; + +struct st_access_param +{ + ulong min_accessed; + struct file_info *found; +}; + +#define NO_FILEPOS (ulong) ~0L + +extern int main(int argc,char * *argv); +static void get_options(int *argc,char ***argv); +static int examine_log(my_string file_name,char **table_names); +static int read_string(IO_CACHE *file,gptr *to,uint length); +static int file_info_compare(void *cmp_arg, void *a,void *b); +static int test_if_open(struct file_info *key,element_count count, + struct test_if_open_param *param); +static void fix_blob_pointers(MARIA_HA *isam,byte *record); +static int test_when_accessed(struct file_info *key,element_count count, + struct st_access_param *access_param); +static void file_info_free(struct file_info *info); +static int close_some_file(TREE *tree); +static int reopen_closed_file(TREE *tree,struct file_info *file_info); +static int find_record_with_key(struct file_info *file_info,byte *record); +static void printf_log(const char *str,...); +static bool cmp_filename(struct file_info *file_info,my_string name); + +static uint verbose=0,update=0,test_info=0,max_files=0,re_open_count=0, + recover=0,prefix_remove=0,opt_processes=0; +static my_string log_filename=0,filepath=0,write_filename=0,record_pos_file=0; +static ulong com_count[10][3],number_of_commands=(ulong) ~0L, + isamlog_process; +static my_off_t isamlog_filepos,start_offset=0,record_pos= HA_OFFSET_ERROR; +static const char *command_name[]= +{"open","write","update","delete","close","extra","lock","re-open", + "delete-all", NullS}; + + +int main(int argc, char **argv) +{ + int error,i,first; + ulong total_count,total_error,total_recover; + MY_INIT(argv[0]); + + log_filename=maria_log_filename; + get_options(&argc,&argv); + maria_init(); + + /* Number of MARIA files we can have open at one time */ + max_files= (my_set_max_open_files(min(max_files,8))-6)/2; + if (update) + printf("Trying to %s MARIA files according to log '%s'\n", + (recover ? "recover" : "update"),log_filename); + error= examine_log(log_filename,argv); + if (update && ! error) + puts("Tables updated successfully"); + total_count=total_error=total_recover=0; + for (i=first=0 ; command_name[i] ; i++) + { + if (com_count[i][0]) + { + if (!first++) + { + if (verbose || update) + puts(""); + puts("Commands Used count Errors Recover errors"); + } + printf("%-12s%9ld%10ld%17ld\n",command_name[i],com_count[i][0], + com_count[i][1],com_count[i][2]); + total_count+=com_count[i][0]; + total_error+=com_count[i][1]; + total_recover+=com_count[i][2]; + } + } + if (total_count) + printf("%-12s%9ld%10ld%17ld\n","Total",total_count,total_error, + total_recover); + if (re_open_count) + printf("Had to do %d re-open because of too few possibly open files\n", + re_open_count); + VOID(maria_panic(HA_PANIC_CLOSE)); + my_free_open_file_info(); + maria_end(); + my_end(test_info ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error); + return 0; /* No compiler warning */ +} /* main */ + + +static void get_options(register int *argc, register char ***argv) +{ + int help,version; + const char *pos,*usage; + char option; + + help=0; + usage="Usage: %s [-?iruvDIV] [-c #] [-f #] [-F filepath/] [-o #] [-R file recordpos] [-w write_file] [log-filename [table ...]] \n"; + pos=""; + + while (--*argc > 0 && *(pos = *(++*argv)) == '-' ) { + while (*++pos) + { + version=0; + switch((option=*pos)) { + case '#': + DBUG_PUSH (++pos); + pos=" "; /* Skip rest of arg */ + break; + case 'c': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + number_of_commands=(ulong) atol(pos); + pos=" "; + break; + case 'u': + update=1; + break; + case 'f': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + max_files=(uint) atoi(pos); + pos=" "; + break; + case 'i': + test_info=1; + break; + case 'o': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + start_offset=(my_off_t) strtoll(pos,NULL,10); + pos=" "; + break; + case 'p': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + prefix_remove=atoi(pos); + break; + case 'r': + update=1; + recover++; + break; + case 'P': + opt_processes=1; + break; + case 'R': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + record_pos_file=(char*) pos; + if (!--*argc) + goto err; + record_pos=(my_off_t) strtoll(*(++*argv),NULL,10); + pos=" "; + break; + case 'v': + verbose++; + break; + case 'w': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + write_filename=(char*) pos; + pos=" "; + break; + case 'F': + if (! *++pos) + { + if (!--*argc) + goto err; + else + pos= *(++*argv); + } + filepath= (char*) pos; + pos=" "; + break; + case 'V': + version=1; + /* Fall through */ + case 'I': + case '?': +#include + printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE, + MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + if (version) + break; + puts("Write info about whats in a MARIA log file."); + printf("If no file name is given %s is used\n",log_filename); + puts(""); + printf(usage,my_progname); + puts(""); + puts("Options: -? or -I \"Info\" -V \"version\" -c \"do only # commands\""); + puts(" -f \"max open files\" -F \"filepath\" -i \"extra info\""); + puts(" -o \"offset\" -p # \"remove # components from path\""); + puts(" -r \"recover\" -R \"file recordposition\""); + puts(" -u \"update\" -v \"verbose\" -w \"write file\""); + puts(" -D \"maria compiled with DBUG\" -P \"processes\""); + puts("\nOne can give a second and a third '-v' for more verbose."); + puts("Normaly one does a update (-u)."); + puts("If a recover is done all writes and all possibly updates and deletes is done\nand errors are only counted."); + puts("If one gives table names as arguments only these tables will be updated\n"); + help=1; +#include + break; + default: + printf("illegal option: \"-%c\"\n",*pos); + break; + } + } + } + if (! *argc) + { + if (help) + exit(0); + (*argv)++; + } + if (*argc >= 1) + { + log_filename=(char*) pos; + (*argc)--; + (*argv)++; + } + return; + err: + VOID(fprintf(stderr,"option \"%c\" used without or with wrong argument\n", + option)); + exit(1); +} + + +static int examine_log(my_string file_name, char **table_names) +{ + uint command,result,files_open; + ulong access_time,length; + my_off_t filepos; + int lock_command,maria_result; + char isam_file_name[FN_REFLEN],llbuff[21],llbuff2[21]; + uchar head[20]; + gptr buff; + struct test_if_open_param open_param; + IO_CACHE cache; + File file; + FILE *write_file; + enum ha_extra_function extra_command; + TREE tree; + struct file_info file_info,*curr_file_info; + DBUG_ENTER("examine_log"); + + if ((file=my_open(file_name,O_RDONLY,MYF(MY_WME))) < 0) + DBUG_RETURN(1); + write_file=0; + if (write_filename) + { + if (!(write_file=my_fopen(write_filename,O_WRONLY,MYF(MY_WME)))) + { + my_close(file,MYF(0)); + DBUG_RETURN(1); + } + } + + init_io_cache(&cache,file,0,READ_CACHE,start_offset,0,MYF(0)); + bzero((gptr) com_count,sizeof(com_count)); + init_tree(&tree,0,0,sizeof(file_info),(qsort_cmp2) file_info_compare,1, + (tree_element_free) file_info_free, NULL); + VOID(init_key_cache(maria_key_cache,KEY_CACHE_BLOCK_SIZE,KEY_CACHE_SIZE, + 0, 0)); + + files_open=0; access_time=0; + while (access_time++ != number_of_commands && + !my_b_read(&cache,(byte*) head,9)) + { + isamlog_filepos=my_b_tell(&cache)-9L; + file_info.filenr= mi_uint2korr(head+1); + isamlog_process=file_info.process=(long) mi_uint4korr(head+3); + if (!opt_processes) + file_info.process=0; + result= mi_uint2korr(head+7); + if ((curr_file_info=(struct file_info*) tree_search(&tree, &file_info, + tree.custom_arg))) + { + curr_file_info->accessed=access_time; + if (update && curr_file_info->used && curr_file_info->closed) + { + if (reopen_closed_file(&tree,curr_file_info)) + { + command=sizeof(com_count)/sizeof(com_count[0][0])/3; + result=0; + goto com_err; + } + } + } + command=(uint) head[0]; + if (command < sizeof(com_count)/sizeof(com_count[0][0])/3 && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + { + com_count[command][0]++; + if (result) + com_count[command][1]++; + } + switch ((enum maria_log_commands) command) { + case MARIA_LOG_OPEN: + if (!table_names[0]) + { + com_count[command][0]--; /* Must be counted explicite */ + if (result) + com_count[command][1]--; + } + + if (curr_file_info) + printf("\nWarning: %s is opened with same process and filenumber\nMaybe you should use the -P option ?\n", + curr_file_info->show_name); + if (my_b_read(&cache,(byte*) head,2)) + goto err; + file_info.name=0; + file_info.show_name=0; + file_info.record=0; + if (read_string(&cache,(gptr*) &file_info.name, + (uint) mi_uint2korr(head))) + goto err; + { + uint i; + char *pos,*to; + + /* Fix if old DOS files to new format */ + for (pos=file_info.name; (pos=strchr(pos,'\\')) ; pos++) + *pos= '/'; + + pos=file_info.name; + for (i=0 ; i < prefix_remove ; i++) + { + char *next; + if (!(next=strchr(pos,'/'))) + break; + pos=next+1; + } + to=isam_file_name; + if (filepath) + to=convert_dirname(isam_file_name,filepath,NullS); + strmov(to,pos); + fn_ext(isam_file_name)[0]=0; /* Remove extension */ + } + open_param.name=file_info.name; + open_param.max_id=0; + VOID(tree_walk(&tree,(tree_walk_action) test_if_open,(void*) &open_param, + left_root_right)); + file_info.id=open_param.max_id+1; + /* + * In the line below +10 is added to accomodate '<' and '>' chars + * plus '\0' at the end, so that there is place for 7 digits. + * It is improbable that same table can have that many entries in + * the table cache. + * The additional space is needed for the sprintf commands two lines + * below. + */ + file_info.show_name=my_memdup(isam_file_name, + (uint) strlen(isam_file_name)+10, + MYF(MY_WME)); + if (file_info.id > 1) + sprintf(strend(file_info.show_name),"<%d>",file_info.id); + file_info.closed=1; + file_info.accessed=access_time; + file_info.used=1; + if (table_names[0]) + { + char **name; + file_info.used=0; + for (name=table_names ; *name ; name++) + { + if (!strcmp(*name,isam_file_name)) + file_info.used=1; /* Update/log only this */ + } + } + if (update && file_info.used) + { + if (files_open >= max_files) + { + if (close_some_file(&tree)) + goto com_err; + files_open--; + } + if (!(file_info.isam= maria_open(isam_file_name,O_RDWR, + HA_OPEN_WAIT_IF_LOCKED))) + goto com_err; + if (!(file_info.record=my_malloc(file_info.isam->s->base.reclength, + MYF(MY_WME)))) + goto end; + files_open++; + file_info.closed=0; + } + VOID(tree_insert(&tree, (gptr) &file_info, 0, tree.custom_arg)); + if (file_info.used) + { + if (verbose && !record_pos_file) + printf_log("%s: open -> %d",file_info.show_name, file_info.filenr); + com_count[command][0]++; + if (result) + com_count[command][1]++; + } + break; + case MARIA_LOG_CLOSE: + if (verbose && !record_pos_file && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + printf_log("%s: %s -> %d",FILENAME(curr_file_info), + command_name[command],result); + if (curr_file_info) + { + if (!curr_file_info->closed) + files_open--; + VOID(tree_delete(&tree, (gptr) curr_file_info, tree.custom_arg)); + } + break; + case MARIA_LOG_EXTRA: + if (my_b_read(&cache,(byte*) head,1)) + goto err; + extra_command=(enum ha_extra_function) head[0]; + if (verbose && !record_pos_file && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + printf_log("%s: %s(%d) -> %d",FILENAME(curr_file_info), + command_name[command], (int) extra_command,result); + if (update && curr_file_info && !curr_file_info->closed) + { + if (maria_extra(curr_file_info->isam, extra_command, 0) != (int) result) + { + fflush(stdout); + VOID(fprintf(stderr, + "Warning: error %d, expected %d on command %s at %s\n", + my_errno,result,command_name[command], + llstr(isamlog_filepos,llbuff))); + fflush(stderr); + } + } + break; + case MARIA_LOG_DELETE: + if (my_b_read(&cache,(byte*) head,8)) + goto err; + filepos=mi_sizekorr(head); + if (verbose && (!record_pos_file || + ((record_pos == filepos || record_pos == NO_FILEPOS) && + !cmp_filename(curr_file_info,record_pos_file))) && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + printf_log("%s: %s at %ld -> %d",FILENAME(curr_file_info), + command_name[command],(long) filepos,result); + if (update && curr_file_info && !curr_file_info->closed) + { + if (maria_rrnd(curr_file_info->isam,curr_file_info->record,filepos)) + { + if (!recover) + goto com_err; + if (verbose) + printf_log("error: Didn't find row to delete with maria_rrnd"); + com_count[command][2]++; /* Mark error */ + } + maria_result=maria_delete(curr_file_info->isam,curr_file_info->record); + if ((maria_result == 0 && result) || + (maria_result && (uint) my_errno != result)) + { + if (!recover) + goto com_err; + if (maria_result) + com_count[command][2]++; /* Mark error */ + if (verbose) + printf_log("error: Got result %d from maria_delete instead of %d", + maria_result, result); + } + } + break; + case MARIA_LOG_WRITE: + case MARIA_LOG_UPDATE: + if (my_b_read(&cache,(byte*) head,12)) + goto err; + filepos=mi_sizekorr(head); + length=mi_uint4korr(head+8); + buff=0; + if (read_string(&cache,&buff,(uint) length)) + goto err; + if ((!record_pos_file || + ((record_pos == filepos || record_pos == NO_FILEPOS) && + !cmp_filename(curr_file_info,record_pos_file))) && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + { + if (write_file && + (my_fwrite(write_file,buff,length,MYF(MY_WAIT_IF_FULL | MY_NABP)))) + goto end; + if (verbose) + printf_log("%s: %s at %ld, length=%ld -> %d", + FILENAME(curr_file_info), + command_name[command], filepos,length,result); + } + if (update && curr_file_info && !curr_file_info->closed) + { + if (curr_file_info->isam->s->base.blobs) + fix_blob_pointers(curr_file_info->isam,buff); + if ((enum maria_log_commands) command == MARIA_LOG_UPDATE) + { + if (maria_rrnd(curr_file_info->isam,curr_file_info->record,filepos)) + { + if (!recover) + { + result=0; + goto com_err; + } + if (verbose) + printf_log("error: Didn't find row to update with maria_rrnd"); + if (recover == 1 || result || + find_record_with_key(curr_file_info,buff)) + { + com_count[command][2]++; /* Mark error */ + break; + } + } + maria_result=maria_update(curr_file_info->isam,curr_file_info->record, + buff); + if ((maria_result == 0 && result) || + (maria_result && (uint) my_errno != result)) + { + if (!recover) + goto com_err; + if (verbose) + printf_log("error: Got result %d from maria_update instead of %d", + maria_result, result); + if (maria_result) + com_count[command][2]++; /* Mark error */ + } + } + else + { + maria_result=maria_write(curr_file_info->isam,buff); + if ((maria_result == 0 && result) || + (maria_result && (uint) my_errno != result)) + { + if (!recover) + goto com_err; + if (verbose) + printf_log("error: Got result %d from maria_write instead of %d", + maria_result, result); + if (maria_result) + com_count[command][2]++; /* Mark error */ + } + if (!recover && filepos != curr_file_info->isam->lastpos) + { + printf("error: Wrote at position: %s, should have been %s", + llstr(curr_file_info->isam->lastpos,llbuff), + llstr(filepos,llbuff2)); + goto end; + } + } + } + my_free(buff,MYF(0)); + break; + case MARIA_LOG_LOCK: + if (my_b_read(&cache,(byte*) head,sizeof(lock_command))) + goto err; + memcpy_fixed(&lock_command,head,sizeof(lock_command)); + if (verbose && !record_pos_file && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + printf_log("%s: %s(%d) -> %d\n",FILENAME(curr_file_info), + command_name[command],lock_command,result); + if (update && curr_file_info && !curr_file_info->closed) + { + if (maria_lock_database(curr_file_info->isam,lock_command) != + (int) result) + goto com_err; + } + break; + case MARIA_LOG_DELETE_ALL: + if (verbose && !record_pos_file && + (!table_names[0] || (curr_file_info && curr_file_info->used))) + printf_log("%s: %s -> %d\n",FILENAME(curr_file_info), + command_name[command],result); + break; + default: + fflush(stdout); + VOID(fprintf(stderr, + "Error: found unknown command %d in logfile, aborted\n", + command)); + fflush(stderr); + goto end; + } + } + end_key_cache(maria_key_cache,1); + delete_tree(&tree); + VOID(end_io_cache(&cache)); + VOID(my_close(file,MYF(0))); + if (write_file && my_fclose(write_file,MYF(MY_WME))) + DBUG_RETURN(1); + DBUG_RETURN(0); + + err: + fflush(stdout); + VOID(fprintf(stderr,"Got error %d when reading from logfile\n",my_errno)); + fflush(stderr); + goto end; + com_err: + fflush(stdout); + VOID(fprintf(stderr,"Got error %d, expected %d on command %s at %s\n", + my_errno,result,command_name[command], + llstr(isamlog_filepos,llbuff))); + fflush(stderr); + end: + end_key_cache(maria_key_cache, 1); + delete_tree(&tree); + VOID(end_io_cache(&cache)); + VOID(my_close(file,MYF(0))); + if (write_file) + VOID(my_fclose(write_file,MYF(MY_WME))); + DBUG_RETURN(1); +} + + +static int read_string(IO_CACHE *file, register gptr *to, register uint length) +{ + DBUG_ENTER("read_string"); + + if (*to) + my_free((gptr) *to,MYF(0)); + if (!(*to= (gptr) my_malloc(length+1,MYF(MY_WME))) || + my_b_read(file,(byte*) *to,length)) + { + if (*to) + my_free(*to,MYF(0)); + *to= 0; + DBUG_RETURN(1); + } + *((char*) *to+length)= '\0'; + DBUG_RETURN (0); +} /* read_string */ + + +static int file_info_compare(void* cmp_arg __attribute__((unused)), + void *a, void *b) +{ + long lint; + + if ((lint=((struct file_info*) a)->process - + ((struct file_info*) b)->process)) + return lint < 0L ? -1 : 1; + return ((struct file_info*) a)->filenr - ((struct file_info*) b)->filenr; +} + + /* ARGSUSED */ + +static int test_if_open (struct file_info *key, + element_count count __attribute__((unused)), + struct test_if_open_param *param) +{ + if (!strcmp(key->name,param->name) && key->id > param->max_id) + param->max_id=key->id; + return 0; +} + + +static void fix_blob_pointers(MARIA_HA *info, byte *record) +{ + byte *pos; + MARIA_BLOB *blob,*end; + + pos=record+info->s->base.reclength; + for (end=info->blobs+info->s->base.blobs, blob= info->blobs; + blob != end ; + blob++) + { + memcpy_fixed(record+blob->offset+blob->pack_length,&pos,sizeof(char*)); + pos+= _ma_calc_blob_length(blob->pack_length,record+blob->offset); + } +} + + /* close the file with hasn't been accessed for the longest time */ + /* ARGSUSED */ + +static int test_when_accessed (struct file_info *key, + element_count count __attribute__((unused)), + struct st_access_param *access_param) +{ + if (key->accessed < access_param->min_accessed && ! key->closed) + { + access_param->min_accessed=key->accessed; + access_param->found=key; + } + return 0; +} + + +static void file_info_free(struct file_info *fileinfo) +{ + DBUG_ENTER("file_info_free"); + if (update) + { + if (!fileinfo->closed) + VOID(maria_close(fileinfo->isam)); + if (fileinfo->record) + my_free(fileinfo->record,MYF(0)); + } + my_free(fileinfo->name,MYF(0)); + my_free(fileinfo->show_name,MYF(0)); + DBUG_VOID_RETURN; +} + + + +static int close_some_file(TREE *tree) +{ + struct st_access_param access_param; + + access_param.min_accessed=LONG_MAX; + access_param.found=0; + + VOID(tree_walk(tree,(tree_walk_action) test_when_accessed, + (void*) &access_param,left_root_right)); + if (!access_param.found) + return 1; /* No open file that is possibly to close */ + if (maria_close(access_param.found->isam)) + return 1; + access_param.found->closed=1; + return 0; +} + + +static int reopen_closed_file(TREE *tree, struct file_info *fileinfo) +{ + char name[FN_REFLEN]; + if (close_some_file(tree)) + return 1; /* No file to close */ + strmov(name,fileinfo->show_name); + if (fileinfo->id > 1) + *strrchr(name,'<')='\0'; /* Remove "" */ + + if (!(fileinfo->isam= maria_open(name,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) + return 1; + fileinfo->closed=0; + re_open_count++; + return 0; +} + + /* Try to find record with uniq key */ + +static int find_record_with_key(struct file_info *file_info, byte *record) +{ + uint key; + MARIA_HA *info=file_info->isam; + uchar tmp_key[HA_MAX_KEY_BUFF]; + + for (key=0 ; key < info->s->base.keys ; key++) + { + if (maria_is_key_active(info->s->state.key_map, key) && + info->s->keyinfo[key].flag & HA_NOSAME) + { + VOID(_ma_make_key(info,key,tmp_key,record,0L)); + return maria_rkey(info,file_info->record,(int) key,(char*) tmp_key,0, + HA_READ_KEY_EXACT); + } + } + return 1; +} + + +static void printf_log(const char *format,...) +{ + char llbuff[21]; + va_list args; + va_start(args,format); + if (verbose > 2) + printf("%9s:",llstr(isamlog_filepos,llbuff)); + if (verbose > 1) + printf("%5ld ",isamlog_process); /* Write process number */ + (void) vprintf((char*) format,args); + putchar('\n'); + va_end(args); +} + + +static bool cmp_filename(struct file_info *file_info, my_string name) +{ + if (!file_info) + return 1; + return strcmp(file_info->name,name) ? 1 : 0; +} diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c new file mode 100644 index 00000000000..2a83bbb0f3f --- /dev/null +++ b/storage/maria/maria_pack.c @@ -0,0 +1,3202 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Pack MARIA file */ + +#ifndef USE_MY_FUNC +#define USE_MY_FUNC /* We need at least my_malloc */ +#endif + +#include "maria_def.h" +#include +#include +#include "mysys_err.h" +#ifdef MSDOS +#include +#endif +#ifndef __GNU_LIBRARY__ +#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */ +#endif +#include +#include + +#if SIZEOF_LONG_LONG > 4 +#define BITS_SAVED 64 +#else +#define BITS_SAVED 32 +#endif + +#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */ +#define HEAD_LENGTH 32 +#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */ + +#define DATA_TMP_EXT ".TMD" +#define OLD_EXT ".OLD" +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE + +struct st_file_buffer { + File file; + uchar *buffer,*pos,*end; + my_off_t pos_in_file; + int bits; + ulonglong bitbucket; +}; + +struct st_huff_tree; +struct st_huff_element; + +typedef struct st_huff_counts { + uint field_length,max_zero_fill; + uint pack_type; + uint max_end_space,max_pre_space,length_bits,min_space; + ulong max_length; + enum en_fieldtype field_type; + struct st_huff_tree *tree; /* Tree for field */ + my_off_t counts[256]; + my_off_t end_space[8]; + my_off_t pre_space[8]; + my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed; + TREE int_tree; /* Tree for detecting distinct column values. */ + byte *tree_buff; /* Column values, 'field_length' each. */ + byte *tree_pos; /* Points to end of column values in 'tree_buff'. */ +} HUFF_COUNTS; + +typedef struct st_huff_element HUFF_ELEMENT; + +/* + WARNING: It is crucial for the optimizations in calc_packed_length() + that 'count' is the first element of 'HUFF_ELEMENT'. +*/ +struct st_huff_element { + my_off_t count; + union un_element { + struct st_nod { + HUFF_ELEMENT *left,*right; + } nod; + struct st_leaf { + HUFF_ELEMENT *null; + uint element_nr; /* Number of element */ + } leaf; + } a; +}; + + +typedef struct st_huff_tree { + HUFF_ELEMENT *root,*element_buffer; + HUFF_COUNTS *counts; + uint tree_number; + uint elements; + my_off_t bytes_packed; + uint tree_pack_length; + uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; + ulonglong *code; + uchar *code_len; +} HUFF_TREE; + + +typedef struct st_isam_mrg { + MARIA_HA **file,**current,**end; + uint free_file; + uint count; + uint min_pack_length; /* Theese is used by packed data */ + uint max_pack_length; + uint ref_length; + uint max_blob_length; + my_off_t records; + /* true if at least one source file has at least one disabled index */ + my_bool src_file_has_indexes_disabled; +} PACK_MRG_INFO; + + +extern int main(int argc,char * *argv); +static void get_options(int *argc,char ***argv); +static MARIA_HA *open_isam_file(char *name,int mode); +static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count); +static int compress(PACK_MRG_INFO *file,char *join_name); +static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records); +static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, + uint trees, + HUFF_COUNTS *huff_counts, + uint fields); +static int compare_tree(void* cmp_arg __attribute__((unused)), + const uchar *s,const uchar *t); +static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts); +static void check_counts(HUFF_COUNTS *huff_counts,uint trees, + my_off_t records); +static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records, + uint max_space_length,my_off_t *space_counts, + my_off_t tot_space_count, + enum en_fieldtype field_type); +static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees); +static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts); +static int compare_huff_elements(void *not_used, byte *a,byte *b); +static int save_counts_in_queue(byte *key,element_count count, + HUFF_TREE *tree); +static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag); +static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees); +static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); +static void make_traverse_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element,uint size, + ulonglong code); +static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, + my_off_t tot_elements,my_off_t filelength); +static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); +static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees); +static uint *make_offset_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element, + uint *offset); +static uint max_bit(uint value); +static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts); +static char *make_new_name(char *new_name,char *old_name); +static char *make_old_name(char *new_name,char *old_name); +static void init_file_buffer(File file,pbool read_buffer); +static int flush_buffer(ulong neaded_length); +static void end_file_buffer(void); +static void write_bits(ulonglong value, uint bits); +static void flush_bits(void); +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc); +static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length, + ha_checksum crc); +static int mrg_close(PACK_MRG_INFO *mrg); +static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf); +static void mrg_reset(PACK_MRG_INFO *mrg); +#if !defined(DBUG_OFF) +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count); +static int fakecmp(my_off_t **count1, my_off_t **count2); +#endif + + +static int error_on_write=0,test_only=0,verbose=0,silent=0, + write_loop=0,force_pack=0, isamchk_neaded=0; +static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; +static my_bool backup, opt_wait; +/* + tree_buff_length is somewhat arbitrary. The bigger it is the better + the chance to win in terms of compression factor. On the other hand, + this table becomes part of the compressed file header. And its length + is coded with 16 bits in the header. Hence the limit is 2**16 - 1. +*/ +static uint tree_buff_length= 65536 - MALLOC_OVERHEAD; +static char tmp_dir[FN_REFLEN]={0},*join_table; +static my_off_t intervall_length; +static ha_checksum glob_crc; +static struct st_file_buffer file_buffer; +static QUEUE queue; +static HUFF_COUNTS *global_count; +static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +static const char *load_default_groups[]= { "mariapack",0 }; + + /* The main program */ + +int main(int argc, char **argv) +{ + int error,ok; + PACK_MRG_INFO merge; + char **default_argv; + MY_INIT(argv[0]); + + load_defaults("my",load_default_groups,&argc,&argv); + default_argv= argv; + get_options(&argc,&argv); + maria_init(); + + error=ok=isamchk_neaded=0; + if (join_table) + { /* Join files into one */ + if (open_isam_files(&merge,argv,(uint) argc) || + compress(&merge,join_table)) + error=1; + } + else while (argc--) + { + MARIA_HA *isam_file; + if (!(isam_file=open_isam_file(*argv++,O_RDWR))) + error=1; + else + { + merge.file= &isam_file; + merge.current=0; + merge.free_file=0; + merge.count=1; + if (compress(&merge,0)) + error=1; + else + ok=1; + } + } + if (ok && isamchk_neaded && !silent) + puts("Remember to run mariachk -rq on compressed tables"); + VOID(fflush(stdout)); + VOID(fflush(stderr)); + free_defaults(default_argv); + maria_end(); + my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error ? 2 : 0); +#ifndef _lint + return 0; /* No compiler warning */ +#endif +} + +enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE}; + +static struct my_option my_long_options[] = +{ +#ifdef __NETWARE__ + {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"backup", 'b', "Make a backup of the table as table_name.OLD.", + (gptr*) &backup, (gptr*) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR_MP, + "Directory where character sets are.", (gptr*) &charsets_dir, + (gptr*) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"force", 'f', + "Force packing of table even if it gets bigger or if tempfile exists.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"join", 'j', + "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.", + (gptr*) &join_table, (gptr*) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, + 0, 0, 0}, + {"help", '?', "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', "Be more silent.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"tmpdir", 'T', "Use temporary directory to store temporary table.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"test", 't', "Don't pack table, only test packing it.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Output version information and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', "Wait and retry if table is in use.", (gptr*) &opt_wait, + (gptr*) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +#include + +static void print_version(void) +{ + VOID(printf("%s Ver 1.23 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("Copyright (C) 2002 MySQL AB"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); + puts("and you are welcome to modify and redistribute it under the GPL license\n"); + + puts("Pack a MARIA-table to take much less space."); + puts("Keys are not updated, you must run mariachk -rq on the datafile"); + puts("afterwards to update the keys."); + puts("You should give the .MYI file as the filename argument."); + + VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); + my_print_help(my_long_options); + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + uint length; + + switch(optid) { +#ifdef __NETWARE__ + case OPT_AUTO_CLOSE: + setscreenmode(SCR_AUTOCLOSE_ON_EXIT); + break; +#endif + case 'f': + force_pack= 1; + tmpfile_createflag= O_RDWR | O_TRUNC; + break; + case 's': + write_loop= verbose= 0; + silent= 1; + break; + case 't': + test_only= 1; + /* Avoid to reset 'verbose' if it was already set > 1. */ + if (! verbose) + verbose= 1; + break; + case 'T': + length= (uint) (strmov(tmp_dir, argument) - tmp_dir); + if (length != dirname_length(tmp_dir)) + { + tmp_dir[length]=FN_LIBCHAR; + tmp_dir[length+1]=0; + } + break; + case 'v': + verbose++; /* Allow for selecting the level of verbosity. */ + silent= 0; + break; + case '#': + DBUG_PUSH(argument ? argument : "d:t:o"); + break; + case 'V': + print_version(); + exit(0); + case 'I': + case '?': + usage(); + exit(0); + } + return 0; +} + + /* reads options */ + /* Initiates DEBUG - but no debugging here ! */ + +static void get_options(int *argc,char ***argv) +{ + int ho_error; + + my_progname= argv[0][0]; + if (isatty(fileno(stdout))) + write_loop=1; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + if (!*argc) + { + usage(); + exit(1); + } + if (join_table) + { + backup=0; /* Not needed */ + tmp_dir[0]=0; + } + return; +} + + +static MARIA_HA *open_isam_file(char *name,int mode) +{ + MARIA_HA *isam_file; + MARIA_SHARE *share; + DBUG_ENTER("open_isam_file"); + + if (!(isam_file=maria_open(name,mode, + (opt_wait ? HA_OPEN_WAIT_IF_LOCKED : + HA_OPEN_ABORT_IF_LOCKED)))) + { + VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno)); + DBUG_RETURN(0); + } + share=isam_file->s; + if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table) + { + if (!force_pack) + { + VOID(fprintf(stderr, "%s is already compressed\n", name)); + VOID(maria_close(isam_file)); + DBUG_RETURN(0); + } + if (verbose) + puts("Recompressing already compressed table"); + share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ + } + if (! force_pack && share->state.state.records != 0 && + (share->state.state.records <= 1 || + share->state.state.data_file_length < 1024)) + { + VOID(fprintf(stderr, "%s is too small to compress\n", name)); + VOID(maria_close(isam_file)); + DBUG_RETURN(0); + } + VOID(maria_lock_database(isam_file,F_WRLCK)); + DBUG_RETURN(isam_file); +} + + +static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count) +{ + uint i,j; + mrg->count=0; + mrg->current=0; + mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE)); + mrg->free_file=1; + mrg->src_file_has_indexes_disabled= 0; + for (i=0; i < count ; i++) + { + if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY))) + goto error; + + mrg->src_file_has_indexes_disabled|= + ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map, + mrg->file[i]->s->base.keys); + } + /* Check that files are identical */ + for (j=0 ; j < count-1 ; j++) + { + MARIA_COLUMNDEF *m1,*m2,*end; + if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength || + mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields) + goto diff_file; + m1=mrg->file[j]->s->rec; + end=m1+mrg->file[j]->s->base.fields; + m2=mrg->file[j+1]->s->rec; + for ( ; m1 != end ; m1++,m2++) + { + if (m1->type != m2->type || m1->length != m2->length) + goto diff_file; + } + } + mrg->count=count; + return 0; + + diff_file: + VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n", + my_progname, names[j], names[j+1])); + error: + while (i--) + maria_close(mrg->file[i]); + my_free((gptr) mrg->file,MYF(0)); + return 1; +} + + +static int compress(PACK_MRG_INFO *mrg,char *result_table) +{ + int error; + File new_file,join_isam_file; + MARIA_HA *isam_file; + MARIA_SHARE *share; + char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN]; + uint i,header_length,fields,trees,used_trees; + my_off_t old_length,new_length,tot_elements; + HUFF_COUNTS *huff_counts; + HUFF_TREE *huff_trees; + DBUG_ENTER("compress"); + + isam_file=mrg->file[0]; /* Take this as an example */ + share=isam_file->s; + new_file=join_isam_file= -1; + trees=fields=0; + huff_trees=0; + huff_counts=0; + + /* Create temporary or join file */ + + if (backup) + VOID(fn_format(org_name,isam_file->filename,"",MARIA_NAME_DEXT,2)); + else + VOID(fn_format(org_name,isam_file->filename,"",MARIA_NAME_DEXT,2+4+16)); + if (!test_only && result_table) + { + /* Make a new indexfile based on first file in list */ + uint length; + char *buff; + strmov(org_name,result_table); /* Fix error messages */ + VOID(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2)); + if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) + < 0) + goto err; + length=(uint) share->base.keystart; + if (!(buff=my_malloc(length,MYF(MY_WME)))) + goto err; + if (my_pread(share->kfile,buff,length,0L,MYF(MY_WME | MY_NABP)) || + my_write(join_isam_file,buff,length, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + my_free(buff,MYF(0)); + goto err; + } + my_free(buff,MYF(0)); + VOID(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2)); + } + else if (!tmp_dir[0]) + VOID(make_new_name(new_name,org_name)); + else + VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4)); + if (!test_only && + (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0) + goto err; + + /* Start calculating statistics */ + + mrg->records=0; + for (i=0 ; i < mrg->count ; i++) + mrg->records+=mrg->file[i]->s->state.state.records; + + DBUG_PRINT("info", ("Compressing %s: (%lu records)", + result_table ? new_name : org_name, + (ulong) mrg->records)); + if (write_loop || verbose) + { + VOID(printf("Compressing %s: (%lu records)\n", + result_table ? new_name : org_name, (ulong) mrg->records)); + } + trees=fields=share->base.fields; + huff_counts=init_huff_count(isam_file,mrg->records); + QUICK_SAFEMALLOC; + + /* + Read the whole data file(s) for statistics. + */ + DBUG_PRINT("info", ("- Calculating statistics")); + if (write_loop || verbose) + VOID(printf("- Calculating statistics\n")); + if (get_statistic(mrg,huff_counts)) + goto err; + NORMAL_SAFEMALLOC; + old_length=0; + for (i=0; i < mrg->count ; i++) + old_length+= (mrg->file[i]->s->state.state.data_file_length - + mrg->file[i]->s->state.state.empty); + + /* + Create a global priority queue in preparation for making + temporary Huffman trees. + */ + if (init_queue(&queue,256,0,0,compare_huff_elements,0)) + goto err; + + /* + Check each column if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress. + */ + check_counts(huff_counts,fields,mrg->records); + + /* + Build a Huffman tree for each column. + */ + huff_trees=make_huff_trees(huff_counts,trees); + + /* + If the packed lengths of combined columns is less then the sum of + the non-combined columns, then create common Huffman trees for them. + We do this only for byte compressed columns, not for distinct values + compressed columns. + */ + if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0) + goto err; + + /* + Assign codes to all byte or column values. + */ + if (make_huff_decode_table(huff_trees,fields)) + goto err; + + /* Prepare a file buffer. */ + init_file_buffer(new_file,0); + + /* + Reserve space in the target file for the fixed compressed file header. + */ + file_buffer.pos_in_file=HEAD_LENGTH; + if (! test_only) + VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0))); + + /* + Write field infos: field type, pack type, length bits, tree number. + */ + write_field_info(huff_counts,fields,used_trees); + + /* + Write decode trees. + */ + if (!(tot_elements=write_huff_tree(huff_trees,trees))) + goto err; + + /* + Calculate the total length of the compression info header. + This includes the fixed compressed file header, the column compression + type descriptions, and the decode trees. + */ + header_length=(uint) file_buffer.pos_in_file+ + (uint) (file_buffer.pos-file_buffer.buffer); + + /* + Compress the source file into the target file. + */ + DBUG_PRINT("info", ("- Compressing file")); + if (write_loop || verbose) + VOID(printf("- Compressing file\n")); + error=compress_isam_file(mrg,huff_counts); + new_length=file_buffer.pos_in_file; + if (!error && !test_only) + { + char buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */ + bzero(buff,sizeof(buff)); + error=my_write(file_buffer.file,buff,sizeof(buff), + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; + } + + /* + Write the fixed compressed file header. + */ + if (!error) + error=write_header(mrg,header_length,used_trees,tot_elements, + new_length); + + /* Flush the file buffer. */ + end_file_buffer(); + + /* Display statistics. */ + DBUG_PRINT("info", ("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", + mrg->min_pack_length, mrg->max_pack_length, + (ulong) (mrg->records ? (new_length/mrg->records) : 0))); + if (verbose && mrg->records) + VOID(printf("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", mrg->min_pack_length, + mrg->max_pack_length, (ulong) (new_length/mrg->records))); + + /* Close source and target file. */ + if (!test_only) + { + error|=my_close(new_file,MYF(MY_WME)); + if (!result_table) + { + error|=my_close(isam_file->dfile,MYF(MY_WME)); + isam_file->dfile= -1; /* Tell maria_close file is closed */ + } + } + + /* Cleanup. */ + free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); + if (! test_only && ! error) + { + if (result_table) + { + error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc); + } + else + { + if (backup) + { + if (my_rename(org_name,make_old_name(temp_name,isam_file->filename), + MYF(MY_WME))) + error=1; + else + { + if (tmp_dir[0]) + error=my_copy(new_name,org_name,MYF(MY_WME)); + else + error=my_rename(new_name,org_name,MYF(MY_WME)); + if (!error) + { + VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME))); + if (tmp_dir[0]) + VOID(my_delete(new_name,MYF(MY_WME))); + } + } + } + else + { + if (tmp_dir[0]) + { + error=my_copy(new_name,org_name, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME)); + if (!error) + VOID(my_delete(new_name,MYF(MY_WME))); + } + else + error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME)); + } + if (! error) + error=save_state(isam_file,mrg,new_length,glob_crc); + } + } + error|=mrg_close(mrg); + if (join_isam_file >= 0) + error|=my_close(join_isam_file,MYF(MY_WME)); + if (error) + { + VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name)); + VOID(my_delete(new_name,MYF(MY_WME))); + DBUG_RETURN(-1); + } + if (write_loop || verbose) + { + if (old_length) + VOID(printf("%.4g%% \n", + (((longlong) (old_length - new_length)) * 100.0 / + (longlong) old_length))); + else + puts("Empty file saved in compressed format"); + } + DBUG_RETURN(0); + + err: + free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); + if (new_file >= 0) + VOID(my_close(new_file,MYF(0))); + if (join_isam_file >= 0) + VOID(my_close(join_isam_file,MYF(0))); + mrg_close(mrg); + VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name)); + DBUG_RETURN(-1); +} + + /* Init a huff_count-struct for each field and init it */ + +static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records) +{ + reg2 uint i; + reg1 HUFF_COUNTS *count; + if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields* + sizeof(HUFF_COUNTS), + MYF(MY_ZEROFILL | MY_WME)))) + { + for (i=0 ; i < info->s->base.fields ; i++) + { + enum en_fieldtype type; + count[i].field_length=info->s->rec[i].length; + type= count[i].field_type= (enum en_fieldtype) info->s->rec[i].type; + if (type == FIELD_INTERVALL || + type == FIELD_CONSTANT || + type == FIELD_ZERO) + type = FIELD_NORMAL; + if (count[i].field_length <= 8 && + (type == FIELD_NORMAL || + type == FIELD_SKIP_ZERO)) + count[i].max_zero_fill= count[i].field_length; + /* + For every column initialize a tree, which is used to detect distinct + column values. 'int_tree' works together with 'tree_buff' and + 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'. + This is accomplished by '-1' as the element size. + */ + init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, + NULL); + if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) + count[i].tree_pos=count[i].tree_buff = + my_malloc(count[i].field_length > 1 ? tree_buff_length : 2, + MYF(MY_WME)); + } + } + return count; +} + + + /* Free memory used by counts and trees */ + +static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees, + HUFF_COUNTS *huff_counts, + uint fields) +{ + register uint i; + + if (huff_trees) + { + for (i=0 ; i < trees ; i++) + { + if (huff_trees[i].element_buffer) + my_free((gptr) huff_trees[i].element_buffer,MYF(0)); + if (huff_trees[i].code) + my_free((gptr) huff_trees[i].code,MYF(0)); + } + my_free((gptr) huff_trees,MYF(0)); + } + if (huff_counts) + { + for (i=0 ; i < fields ; i++) + { + if (huff_counts[i].tree_buff) + { + my_free((gptr) huff_counts[i].tree_buff,MYF(0)); + delete_tree(&huff_counts[i].int_tree); + } + } + my_free((gptr) huff_counts,MYF(0)); + } + delete_queue(&queue); /* This is safe to free */ + return; +} + + /* Read through old file and gather some statistics */ + +static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) +{ + int error; + uint length; + ulong reclength,max_blob_length; + byte *record,*pos,*next_pos,*end_pos,*start_pos; + ha_rows record_count; + my_bool static_row_size; + HUFF_COUNTS *count,*end_count; + TREE_ELEMENT *element; + DBUG_ENTER("get_statistic"); + + reclength=mrg->file[0]->s->base.reclength; + record=(byte*) my_alloca(reclength); + end_count=huff_counts+mrg->file[0]->s->base.fields; + record_count=0; glob_crc=0; + max_blob_length=0; + + /* Check how to calculate checksum */ + static_row_size=1; + for (count=huff_counts ; count < end_count ; count++) + { + if (count->field_type == FIELD_BLOB || + count->field_type == FIELD_VARCHAR) + { + static_row_size=0; + break; + } + } + + mrg_reset(mrg); + while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) + { + ulong tot_blob_length=0; + if (! error) + { + /* glob_crc is a checksum over all bytes of all records. */ + if (static_row_size) + glob_crc+=_ma_static_checksum(mrg->file[0],record); + else + glob_crc+=_ma_checksum(mrg->file[0],record); + + /* Count the incidence of values separately for every column. */ + for (pos=record,count=huff_counts ; + count < end_count ; + count++, + pos=next_pos) + { + next_pos=end_pos=(start_pos=pos)+count->field_length; + + /* + Put the whole column value in a tree if there is room for it. + 'int_tree' is used to quickly check for duplicate values. + 'tree_buff' collects as many distinct column values as + possible. If the field length is > 1, it is tree_buff_length, + else 2 bytes. Each value is 'field_length' bytes big. If there + are more distinct column values than fit into the buffer, we + give up with this tree. BLOBs and VARCHARs do not have a + tree_buff as it can only be used with fixed length columns. + For the special case of field length == 1, we handle only the + case that there is only one distinct value in the table(s). + Otherwise, we can have a maximum of 256 distinct values. This + is then handled by the normal Huffman tree build. + + Another limit for collecting distinct column values is the + number of values itself. Since we would need to build a + Huffman tree for the values, we are limited by the 'IS_OFFSET' + constant. This constant expresses a bit which is used to + determine if a tree element holds a final value or an offset + to a child element. Hence, all values and offsets need to be + smaller than 'IS_OFFSET'. A tree element is implemented with + two integer values, one for the left branch and one for the + right branch. For the extreme case that the first element + points to the last element, the number of integers in the tree + must be less or equal to IS_OFFSET. So the number of elements + must be less or equal to IS_OFFSET / 2. + + WARNING: At first, we insert a pointer into the record buffer + as the key for the tree. If we got a new distinct value, which + is really inserted into the tree, instead of being counted + only, we will copy the column value from the record buffer to + 'tree_buff' and adjust the key pointer of the tree accordingly. + */ + if (count->tree_buff) + { + global_count=count; + if (!(element=tree_insert(&count->int_tree,pos, 0, + count->int_tree.custom_arg)) || + (element->count == 1 && + (count->tree_buff + tree_buff_length < + count->tree_pos + count->field_length)) || + (count->int_tree.elements_in_tree > IS_OFFSET / 2) || + (count->field_length == 1 && + count->int_tree.elements_in_tree > 1)) + { + delete_tree(&count->int_tree); + my_free(count->tree_buff,MYF(0)); + count->tree_buff=0; + } + else + { + /* + If tree_insert() succeeds, it either creates a new element + or increments the counter of an existing element. + */ + if (element->count == 1) + { + /* Copy the new column value into 'tree_buff'. */ + memcpy(count->tree_pos,pos,(size_t) count->field_length); + /* Adjust the key pointer in the tree. */ + tree_set_pointer(element,count->tree_pos); + /* Point behind the last column value so far. */ + count->tree_pos+=count->field_length; + } + } + } + + /* Save character counters and space-counts and zero-field-counts */ + if (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_ENDSPACE) + { + /* Ignore trailing space. */ + for ( ; end_pos > pos ; end_pos--) + if (end_pos[-1] != ' ') + break; + /* Empty fields are just counted. Go to the next record. */ + if (end_pos == pos) + { + count->empty_fields++; + count->max_zero_fill=0; + continue; + } + /* + Count the total of all trailing spaces and the number of + short trailing spaces. Remember the longest trailing space. + */ + length= (uint) (next_pos-end_pos); + count->tot_end_space+=length; + if (length < 8) + count->end_space[length]++; + if (count->max_end_space < length) + count->max_end_space = length; + } + + if (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_PRESPACE) + { + /* Ignore leading space. */ + for (pos=start_pos; pos < end_pos ; pos++) + if (pos[0] != ' ') + break; + /* Empty fields are just counted. Go to the next record. */ + if (end_pos == pos) + { + count->empty_fields++; + count->max_zero_fill=0; + continue; + } + /* + Count the total of all leading spaces and the number of + short leading spaces. Remember the longest leading space. + */ + length= (uint) (pos-start_pos); + count->tot_pre_space+=length; + if (length < 8) + count->pre_space[length]++; + if (count->max_pre_space < length) + count->max_pre_space = length; + } + + /* Calculate pos, end_pos, and max_length for variable length fields. */ + if (count->field_type == FIELD_BLOB) + { + uint field_length=count->field_length -maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(field_length, start_pos); + memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*)); + end_pos=pos+blob_length; + tot_blob_length+=blob_length; + set_if_bigger(count->max_length,blob_length); + } + else if (count->field_type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + pos= start_pos+pack_length; + end_pos= pos+length; + set_if_bigger(count->max_length,length); + } + + /* Evaluate 'max_zero_fill' for short fields. */ + if (count->field_length <= 8 && + (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_ZERO)) + { + uint i; + /* Zero fields are just counted. Go to the next record. */ + if (!memcmp((byte*) start_pos,zero_string,count->field_length)) + { + count->zero_fields++; + continue; + } + /* + max_zero_fill starts with field_length. It is decreased every + time a shorter "zero trailer" is found. It is set to zero when + an empty field is found (see above). This suggests that the + variable should be called 'min_zero_fill'. + */ + for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ; + i++) ; + if (i < count->max_zero_fill) + count->max_zero_fill=i; + } + + /* Ignore zero fields and check fields. */ + if (count->field_type == FIELD_ZERO || + count->field_type == FIELD_CHECK) + continue; + + /* + Count the incidence of every byte value in the + significant field value. + */ + for ( ; pos < end_pos ; pos++) + count->counts[(uchar) *pos]++; + + /* Step to next field. */ + } + + if (tot_blob_length > max_blob_length) + max_blob_length=tot_blob_length; + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) + { + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); + } + } + else if (error != HA_ERR_RECORD_DELETED) + { + VOID(fprintf(stderr, "Got error %d while reading rows", error)); + break; + } + + /* Step to next record. */ + } + if (write_loop) + { + VOID(printf(" \r")); + VOID(fflush(stdout)); + } + + /* + If --debug=d,fakebigcodes is set, fake the counts to get big Huffman + codes. + */ + DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count);); + + DBUG_PRINT("info", ("Found the following number of incidents " + "of the byte codes:")); + if (verbose >= 2) + VOID(printf("Found the following number of incidents " + "of the byte codes:\n")); + for (count= huff_counts ; count < end_count; count++) + { + uint idx; + my_off_t total_count; + char llbuf[32]; + + DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1)); + if (verbose >= 2) + VOID(printf("column: %3u\n", count - huff_counts + 1)); + if (count->tree_buff) + { + DBUG_PRINT("info", ("number of distinct values: %u", + (count->tree_pos - count->tree_buff) / + count->field_length)); + if (verbose >= 2) + VOID(printf("number of distinct values: %u\n", + (count->tree_pos - count->tree_buff) / + count->field_length)); + } + total_count= 0; + for (idx= 0; idx < 256; idx++) + { + if (count->counts[idx]) + { + total_count+= count->counts[idx]; + DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx, + llstr((longlong) count->counts[idx], llbuf))); + if (verbose >= 2) + VOID(printf("counts[0x%02x]: %12s\n", idx, + llstr((longlong) count->counts[idx], llbuf))); + } + } + DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count, + llbuf))); + if ((verbose >= 2) && total_count) + { + VOID(printf("total: %12s\n", + llstr((longlong) total_count, llbuf))); + } + } + + mrg->records=record_count; + mrg->max_blob_length=max_blob_length; + my_afree((gptr) record); + DBUG_RETURN(error != HA_ERR_END_OF_FILE); +} + +static int compare_huff_elements(void *not_used __attribute__((unused)), + byte *a, byte *b) +{ + return *((my_off_t*) a) < *((my_off_t*) b) ? -1 : + (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1); +} + + /* Check each tree if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress */ + +static void check_counts(HUFF_COUNTS *huff_counts, uint trees, + my_off_t records) +{ + uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count]; + my_off_t old_length,new_length,length; + DBUG_ENTER("check_counts"); + + bzero((gptr) field_count,sizeof(field_count)); + space_fields=fill_zero_fields=0; + + for (; trees-- ; huff_counts++) + { + if (huff_counts->field_type == FIELD_BLOB) + { + huff_counts->length_bits=max_bit(huff_counts->max_length); + goto found_pack; + } + else if (huff_counts->field_type == FIELD_VARCHAR) + { + huff_counts->length_bits=max_bit(huff_counts->max_length); + goto found_pack; + } + else if (huff_counts->field_type == FIELD_CHECK) + { + huff_counts->bytes_packed=0; + huff_counts->counts[0]=0; + goto found_pack; + } + + huff_counts->field_type=FIELD_NORMAL; + huff_counts->pack_type=0; + + /* Check for zero-filled records (in this column), or zero records. */ + if (huff_counts->zero_fields || ! records) + { + my_off_t old_space_count; + /* + If there are only zero filled records (in this column), + or no records at all, we are done. + */ + if (huff_counts->zero_fields == records) + { + huff_counts->field_type= FIELD_ZERO; + huff_counts->bytes_packed=0; + huff_counts->counts[0]=0; + goto found_pack; + } + /* Remeber the number of significant spaces. */ + old_space_count=huff_counts->counts[' ']; + /* Add all leading and trailing spaces. */ + huff_counts->counts[' ']+= (huff_counts->tot_end_space + + huff_counts->tot_pre_space + + huff_counts->empty_fields * + huff_counts->field_length); + /* Check, what the compressed length of this would be. */ + old_length=calc_packed_length(huff_counts,0)+records/8; + /* Get the number of zero bytes. */ + length=huff_counts->zero_fields*huff_counts->field_length; + /* Add it to the counts. */ + huff_counts->counts[0]+=length; + /* Check, what the compressed length of this would be. */ + new_length=calc_packed_length(huff_counts,0); + /* If the compression without the zeroes would be shorter, we are done. */ + if (old_length < new_length && huff_counts->field_length > 1) + { + huff_counts->field_type=FIELD_SKIP_ZERO; + huff_counts->counts[0]-=length; + huff_counts->bytes_packed=old_length- records/8; + goto found_pack; + } + /* Remove the insignificant spaces, but keep the zeroes. */ + huff_counts->counts[' ']=old_space_count; + } + /* Check, what the compressed length of this column would be. */ + huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + + /* + If there are enough empty records (in this column), + treating them specially may pay off. + */ + if (huff_counts->empty_fields) + { + if (huff_counts->field_length > 2 && + huff_counts->empty_fields + (records - huff_counts->empty_fields)* + (1+max_bit(max(huff_counts->max_pre_space, + huff_counts->max_end_space))) < + records * max_bit(huff_counts->field_length)) + { + huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS; + } + else + { + length=huff_counts->empty_fields*huff_counts->field_length; + if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space) + { + huff_counts->tot_end_space+=length; + huff_counts->max_end_space=huff_counts->field_length; + if (huff_counts->field_length < 8) + huff_counts->end_space[huff_counts->field_length]+= + huff_counts->empty_fields; + } + if (huff_counts->tot_pre_space) + { + huff_counts->tot_pre_space+=length; + huff_counts->max_pre_space=huff_counts->field_length; + if (huff_counts->field_length < 8) + huff_counts->pre_space[huff_counts->field_length]+= + huff_counts->empty_fields; + } + } + } + + /* + If there are enough trailing spaces (in this column), + treating them specially may pay off. + */ + if (huff_counts->tot_end_space) + { + huff_counts->counts[' ']+=huff_counts->tot_pre_space; + if (test_space_compress(huff_counts,records,huff_counts->max_end_space, + huff_counts->end_space, + huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE)) + goto found_pack; + huff_counts->counts[' ']-=huff_counts->tot_pre_space; + } + + /* + If there are enough leading spaces (in this column), + treating them specially may pay off. + */ + if (huff_counts->tot_pre_space) + { + if (test_space_compress(huff_counts,records,huff_counts->max_pre_space, + huff_counts->pre_space, + huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE)) + goto found_pack; + } + + found_pack: /* Found field-packing */ + + /* Test if we can use zero-fill */ + + if (huff_counts->max_zero_fill && + (huff_counts->field_type == FIELD_NORMAL || + huff_counts->field_type == FIELD_SKIP_ZERO)) + { + huff_counts->counts[0]-=huff_counts->max_zero_fill* + (huff_counts->field_type == FIELD_SKIP_ZERO ? + records - huff_counts->zero_fields : records); + huff_counts->pack_type|=PACK_TYPE_ZERO_FILL; + huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + } + + /* Test if intervall-field is better */ + + if (huff_counts->tree_buff) + { + HUFF_TREE tree; + + DBUG_EXECUTE_IF("forceintervall", + huff_counts->bytes_packed= ~ (my_off_t) 0;); + tree.element_buffer=0; + if (!make_huff_tree(&tree,huff_counts) && + tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed) + { + if (tree.elements == 1) + huff_counts->field_type=FIELD_CONSTANT; + else + huff_counts->field_type=FIELD_INTERVALL; + huff_counts->pack_type=0; + } + else + { + my_free((gptr) huff_counts->tree_buff,MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff=0; + } + if (tree.element_buffer) + my_free((gptr) tree.element_buffer,MYF(0)); + } + if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS) + space_fields++; + if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL) + fill_zero_fields++; + field_count[huff_counts->field_type]++; + } + DBUG_PRINT("info", ("normal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields)); + DBUG_PRINT("info", ("pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d", + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); + if (verbose) + VOID(printf("\nnormal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d\n" + "pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d\n", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields, + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); + DBUG_VOID_RETURN; +} + + /* Test if we can use space-compression and empty-field-compression */ + +static int +test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records, + uint max_space_length, my_off_t *space_counts, + my_off_t tot_space_count, enum en_fieldtype field_type) +{ + int min_pos; + uint length_bits,i; + my_off_t space_count,min_space_count,min_pack,new_length,skip; + + length_bits=max_bit(max_space_length); + + /* Default no end_space-packing */ + space_count=huff_counts->counts[(uint) ' ']; + min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count); + min_pack=calc_packed_length(huff_counts,0); + min_pos= -2; + huff_counts->counts[(uint) ' ']=space_count; + + /* Test with allways space-count */ + new_length=huff_counts->bytes_packed+length_bits*records/8; + if (new_length+1 < min_pack) + { + min_pos= -1; + min_pack=new_length; + min_space_count=space_count; + } + /* Test with length-flag */ + for (skip=0L, i=0 ; i < 8 ; i++) + { + if (space_counts[i]) + { + if (i) + huff_counts->counts[(uint) ' ']+=space_counts[i]; + skip+=huff_counts->pre_space[i]; + new_length=calc_packed_length(huff_counts,0)+ + (records+(records-skip)*(1+length_bits))/8; + if (new_length < min_pack) + { + min_pos=(int) i; + min_pack=new_length; + min_space_count=huff_counts->counts[(uint) ' ']; + } + } + } + + huff_counts->counts[(uint) ' ']=min_space_count; + huff_counts->bytes_packed=min_pack; + switch (min_pos) { + case -2: + return(0); /* No space-compress */ + case -1: /* Always space-count */ + huff_counts->field_type=field_type; + huff_counts->min_space=0; + huff_counts->length_bits=max_bit(max_space_length); + break; + default: + huff_counts->field_type=field_type; + huff_counts->min_space=(uint) min_pos; + huff_counts->pack_type|=PACK_TYPE_SELECTED; + huff_counts->length_bits=max_bit(max_space_length); + break; + } + return(1); /* Using space-compress */ +} + + + /* Make a huff_tree of each huff_count */ + +static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees) +{ + uint tree; + HUFF_TREE *huff_tree; + DBUG_ENTER("make_huff_trees"); + + if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(0); + + for (tree=0 ; tree < trees ; tree++) + { + if (make_huff_tree(huff_tree+tree,huff_counts+tree)) + { + while (tree--) + my_free((gptr) huff_tree[tree].element_buffer,MYF(0)); + my_free((gptr) huff_tree,MYF(0)); + DBUG_RETURN(0); + } + } + DBUG_RETURN(huff_tree); +} + +/* + Build a Huffman tree. + + SYNOPSIS + make_huff_tree() + huff_tree The Huffman tree. + huff_counts The counts. + + DESCRIPTION + Build a Huffman tree according to huff_counts->counts or + huff_counts->tree_buff. tree_buff, if non-NULL contains up to + tree_buff_length of distinct column values. In that case, whole + values can be Huffman encoded instead of single bytes. + + RETURN + 0 OK + != 0 Error +*/ + +static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) +{ + uint i,found,bits_packed,first,last; + my_off_t bytes_packed; + HUFF_ELEMENT *a,*b,*new_huff_el; + + first=last=0; + if (huff_counts->tree_buff) + { + /* Calculate the number of distinct values in tree_buff. */ + found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) / + huff_counts->field_length; + first=0; last=found-1; + } + else + { + /* Count the number of byte codes found in the column. */ + for (i=found=0 ; i < 256 ; i++) + { + if (huff_counts->counts[i]) + { + if (! found++) + first=i; + last=i; + } + } + if (found < 2) + found=2; + } + + /* When using 'tree_buff' we can have more that 256 values. */ + if (queue.max_elements < found) + { + delete_queue(&queue); + if (init_queue(&queue,found,0,0,compare_huff_elements,0)) + return -1; + } + + /* Allocate or reallocate an element buffer for the Huffman tree. */ + if (!huff_tree->element_buffer) + { + if (!(huff_tree->element_buffer= + (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME)))) + return 1; + } + else + { + HUFF_ELEMENT *temp; + if (!(temp= + (HUFF_ELEMENT*) my_realloc((gptr) huff_tree->element_buffer, + found*2*sizeof(HUFF_ELEMENT), + MYF(MY_WME)))) + return 1; + huff_tree->element_buffer=temp; + } + + huff_counts->tree=huff_tree; + huff_tree->counts=huff_counts; + huff_tree->min_chr=first; + huff_tree->max_chr=last; + huff_tree->char_bits=max_bit(last-first); + huff_tree->offset_bits=max_bit(found-1)+1; + + if (huff_counts->tree_buff) + { + huff_tree->elements=0; + huff_tree->tree_pack_length=(1+15+16+5+5+ + (huff_tree->char_bits+1)*found+ + (huff_tree->offset_bits+1)* + (found-2)+7)/8 + + (uint) (huff_tree->counts->tree_pos- + huff_tree->counts->tree_buff); + /* + Put a HUFF_ELEMENT into the queue for every distinct column value. + + tree_walk() calls save_counts_in_queue() for every element in + 'int_tree'. This takes elements from the target trees element + buffer and places references to them into the buffer of the + priority queue. We insert in column value order, but the order is + in fact irrelevant here. We will establish the correct order + later. + */ + tree_walk(&huff_counts->int_tree, + (int (*)(void*, element_count,void*)) save_counts_in_queue, + (gptr) huff_tree, left_root_right); + } + else + { + huff_tree->elements=found; + huff_tree->tree_pack_length=(9+9+5+5+ + (huff_tree->char_bits+1)*found+ + (huff_tree->offset_bits+1)* + (found-2)+7)/8; + /* + Put a HUFF_ELEMENT into the queue for every byte code found in the column. + + The elements are taken from the target trees element buffer. + Instead of using queue_insert(), we just place references to the + elements into the buffer of the priority queue. We insert in byte + value order, but the order is in fact irrelevant here. We will + establish the correct order later. + */ + for (i=first, found=0 ; i <= last ; i++) + { + if (huff_counts->counts[i]) + { + new_huff_el=huff_tree->element_buffer+(found++); + new_huff_el->count=huff_counts->counts[i]; + new_huff_el->a.leaf.null=0; + new_huff_el->a.leaf.element_nr=i; + queue.root[found]=(byte*) new_huff_el; + } + } + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which builds the Huffman tree. + */ + while (found < 2) + { + new_huff_el=huff_tree->element_buffer+(found++); + new_huff_el->count=0; + new_huff_el->a.leaf.null=0; + if (last) + new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1; + else + new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1; + queue.root[found]=(byte*) new_huff_el; + } + } + + /* Make a queue from the queue buffer. */ + queue.elements=found; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ + for (i=found/2 ; i > 0 ; i--) + _downheap(&queue,i); + + /* The Huffman algorithm. */ + bytes_packed=0; bits_packed=0; + for (i=1 ; i < found ; i++) + { + /* + Pop the top element from the queue (the one with the least incidence). + Popping from a priority queue includes a re-ordering of the queue, + to get the next least incidence element to the top. + */ + a=(HUFF_ELEMENT*) queue_remove(&queue,0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b=(HUFF_ELEMENT*) queue.root[1]; + /* Get a new element from the element buffer. */ + new_huff_el=huff_tree->element_buffer+found+i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count=a->count+b->count; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ + bits_packed+=(uint) (new_huff_el->count & 7); + bytes_packed+=new_huff_el->count/8; + /* The new element points to its children, lesser in left. */ + new_huff_el->a.nod.left=a; + new_huff_el->a.nod.right=b; + /* + Replace the copied top element by the new element and re-order the + queue. + */ + queue.root[1]=(byte*) new_huff_el; + queue_replaced(&queue); + } + huff_tree->root=(HUFF_ELEMENT*) queue.root[1]; + huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8; + return 0; +} + +static int compare_tree(void* cmp_arg __attribute__((unused)), + register const uchar *s, register const uchar *t) +{ + uint length; + for (length=global_count->field_length; length-- ;) + if (*s++ != *t++) + return (int) s[-1] - (int) t[-1]; + return 0; +} + +/* + Organize distinct column values and their incidences into a priority queue. + + SYNOPSIS + save_counts_in_queue() + key The column value. + count The incidence of this value. + tree The Huffman tree to be built later. + + DESCRIPTION + We use the element buffer of the targeted tree. The distinct column + values are organized in a priority queue first. The Huffman + algorithm will later organize the elements into a Huffman tree. For + the time being, we just place references to the elements into the + queue buffer. The buffer will later be organized into a priority + queue. + + RETURN + 0 + */ + +static int save_counts_in_queue(byte *key, element_count count, + HUFF_TREE *tree) +{ + HUFF_ELEMENT *new_huff_el; + + new_huff_el=tree->element_buffer+(tree->elements++); + new_huff_el->count=count; + new_huff_el->a.leaf.null=0; + new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) / + tree->counts->field_length; + queue.root[tree->elements]=(byte*) new_huff_el; + return 0; +} + + +/* + Calculate length of file if given counts should be used. + + SYNOPSIS + calc_packed_length() + huff_counts The counts for a column of the table(s). + add_tree_lenght If the decode tree length should be added. + + DESCRIPTION + We need to follow the Huffman algorithm until we know, how many bits + are required for each byte code. But we do not need the resulting + Huffman tree. Hence, we can leave out some steps which are essential + in make_huff_tree(). + + RETURN + Number of bytes required to compress this table column. +*/ + +static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, + uint add_tree_lenght) +{ + uint i,found,bits_packed,first,last; + my_off_t bytes_packed; + HUFF_ELEMENT element_buffer[256]; + DBUG_ENTER("calc_packed_length"); + + /* + WARNING: We use a small hack for efficiency: Instead of placing + references to HUFF_ELEMENTs into the queue, we just insert + references to the counts of the byte codes which appeared in this + table column. During the Huffman algorithm they are successively + replaced by references to HUFF_ELEMENTs. This works, because + HUFF_ELEMENTs have the incidence count at their beginning. + Regardless, wether the queue array contains references to counts of + type my_off_t or references to HUFF_ELEMENTs which have the count of + type my_off_t at their beginning, it always points to a count of the + same type. + + Instead of using queue_insert(), we just copy the references into + the buffer of the priority queue. We insert in byte value order, but + the order is in fact irrelevant here. We will establish the correct + order later. + */ + first=last=0; + for (i=found=0 ; i < 256 ; i++) + { + if (huff_counts->counts[i]) + { + if (! found++) + first=i; + last=i; + /* We start with root[1], which is the queues top element. */ + queue.root[found]=(byte*) &huff_counts->counts[i]; + } + } + if (!found) + DBUG_RETURN(0); /* Empty tree */ + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which follows the Huffman algorithm. + */ + if (found < 2) + queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1]; + + /* Make a queue from the queue buffer. */ + queue.elements=found; + + bytes_packed=0; bits_packed=0; + /* Add the length of the coding table, which would become part of the file. */ + if (add_tree_lenght) + bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+ + (max_bit(found-1)+1+1)*(found-2) +7)/8; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ + for (i=(found+1)/2 ; i > 0 ; i--) + _downheap(&queue,i); + + /* The Huffman algorithm. */ + for (i=0 ; i < found-1 ; i++) + { + my_off_t *a; + my_off_t *b; + HUFF_ELEMENT *new_huff_el; + + /* + Pop the top element from the queue (the one with the least + incidence). Popping from a priority queue includes a re-ordering + of the queue, to get the next least incidence element to the top. + */ + a= (my_off_t*) queue_remove(&queue, 0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b= (my_off_t*) queue.root[1]; + /* Create a new element in a local (automatic) buffer. */ + new_huff_el= element_buffer + i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count= *a + *b; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ + bits_packed+=(uint) (new_huff_el->count & 7); + bytes_packed+=new_huff_el->count/8; + /* + Replace the copied top element by the new element and re-order the + queue. This successively replaces the references to counts by + references to HUFF_ELEMENTs. + */ + queue.root[1]=(byte*) new_huff_el; + queue_replaced(&queue); + } + DBUG_RETURN(bytes_packed+(bits_packed+7)/8); +} + + + /* Remove trees that don't give any compression */ + +static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees) +{ + uint k,tree_number; + HUFF_COUNTS count,*i,*j,*last_count; + + last_count=huff_counts+trees; + for (tree_number=0, i=huff_counts ; i < last_count ; i++) + { + if (!i->tree->tree_number) + { + i->tree->tree_number= ++tree_number; + if (i->tree_buff) + continue; /* Don't join intervall */ + for (j=i+1 ; j < last_count ; j++) + { + if (! j->tree->tree_number && ! j->tree_buff) + { + for (k=0 ; k < 256 ; k++) + count.counts[k]=i->counts[k]+j->counts[k]; + if (calc_packed_length(&count,1) <= + i->tree->bytes_packed + j->tree->bytes_packed+ + i->tree->tree_pack_length+j->tree->tree_pack_length+ + ALLOWED_JOIN_DIFF) + { + memcpy_fixed((byte*) i->counts,(byte*) count.counts, + sizeof(count.counts[0])*256); + my_free((gptr) j->tree->element_buffer,MYF(0)); + j->tree->element_buffer=0; + j->tree=i->tree; + bmove((byte*) i->counts,(byte*) count.counts, + sizeof(count.counts[0])*256); + if (make_huff_tree(i->tree,i)) + return (uint) -1; + } + } + } + } + } + DBUG_PRINT("info", ("Original trees: %d After join: %d", + trees, tree_number)); + if (verbose) + VOID(printf("Original trees: %d After join: %d\n", trees, tree_number)); + return tree_number; /* Return trees left */ +} + + +/* + Fill in huff_tree encode tables. + + SYNOPSIS + make_huff_decode_table() + huff_tree An array of HUFF_TREE which are to be encoded. + trees The number of HUFF_TREE in the array. + + RETURN + 0 success + != 0 error +*/ + +static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) +{ + uint elements; + for ( ; trees-- ; huff_tree++) + { + if (huff_tree->tree_number > 0) + { + elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; + if (!(huff_tree->code = + (ulonglong*) my_malloc(elements* + (sizeof(ulonglong) + sizeof(uchar)), + MYF(MY_WME | MY_ZEROFILL)))) + return 1; + huff_tree->code_len=(uchar*) (huff_tree->code+elements); + make_traverse_code_tree(huff_tree, huff_tree->root, + 8 * sizeof(ulonglong), LL(0)); + } + } + return 0; +} + + +static void make_traverse_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element, + uint size, ulonglong code) +{ + uint chr; + if (!element->a.leaf.null) + { + chr=element->a.leaf.element_nr; + huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size); + huff_tree->code[chr]= (code >> size); + if (huff_tree->height < 8 * sizeof(ulonglong) - size) + huff_tree->height= 8 * sizeof(ulonglong) - size; + } + else + { + size--; + make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); + make_traverse_code_tree(huff_tree, element->a.nod.right, size, + code + (((ulonglong) 1) << size)); + } + return; +} + + +/* + Convert a value into binary digits. + + SYNOPSIS + bindigits() + value The value. + length The number of low order bits to convert. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *bindigits(ulonglong value, uint bits) +{ + static char digits[72]; + char *ptr= digits; + uint idx= bits; + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + *(ptr++)= '0' + ((value >> (--idx)) & 1); + *ptr= '\0'; + return digits; +} + + +/* + Convert a value into hexadecimal digits. + + SYNOPSIS + hexdigits() + value The value. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *hexdigits(ulonglong value) +{ + static char digits[20]; + char *ptr= digits; + uint idx= 2 * sizeof(value); /* Two hex digits per byte. */ + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + { + if ((*(ptr++)= '0' + ((value >> (4 * (--idx))) & 0xf)) > '9') + *(ptr - 1)+= 'a' - '9' - 1; + } + *ptr= '\0'; + return digits; +} + + + /* Write header to new packed data file */ + +static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees, + my_off_t tot_elements,my_off_t filelength) +{ + byte *buff= (byte*) file_buffer.pos; + + bzero(buff,HEAD_LENGTH); + memcpy_fixed(buff,maria_pack_file_magic,4); + int4store(buff+4,head_length); + int4store(buff+8, mrg->min_pack_length); + int4store(buff+12,mrg->max_pack_length); + int4store(buff+16,tot_elements); + int4store(buff+20,intervall_length); + int2store(buff+24,trees); + buff[26]=(char) mrg->ref_length; + /* Save record pointer length */ + buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2); + if (test_only) + return 0; + VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0))); + return my_write(file_buffer.file,(const byte *) file_buffer.pos,HEAD_LENGTH, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; +} + + /* Write fieldinfo to new packed file */ + +static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) +{ + reg1 uint i; + uint huff_tree_bits; + huff_tree_bits=max_bit(trees ? trees-1 : 0); + + DBUG_PRINT("info", ("")); + DBUG_PRINT("info", ("column types:")); + DBUG_PRINT("info", ("FIELD_NORMAL 0")); + DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1")); + DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2")); + DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3")); + DBUG_PRINT("info", ("FIELD_BLOB 4")); + DBUG_PRINT("info", ("FIELD_CONSTANT 5")); + DBUG_PRINT("info", ("FIELD_INTERVALL 6")); + DBUG_PRINT("info", ("FIELD_ZERO 7")); + DBUG_PRINT("info", ("FIELD_VARCHAR 8")); + DBUG_PRINT("info", ("FIELD_CHECK 9")); + DBUG_PRINT("info", ("")); + DBUG_PRINT("info", ("pack type as a set of flags:")); + DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1")); + DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2")); + DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4")); + DBUG_PRINT("info", ("")); + if (verbose >= 2) + { + VOID(printf("\n")); + VOID(printf("column types:\n")); + VOID(printf("FIELD_NORMAL 0\n")); + VOID(printf("FIELD_SKIP_ENDSPACE 1\n")); + VOID(printf("FIELD_SKIP_PRESPACE 2\n")); + VOID(printf("FIELD_SKIP_ZERO 3\n")); + VOID(printf("FIELD_BLOB 4\n")); + VOID(printf("FIELD_CONSTANT 5\n")); + VOID(printf("FIELD_INTERVALL 6\n")); + VOID(printf("FIELD_ZERO 7\n")); + VOID(printf("FIELD_VARCHAR 8\n")); + VOID(printf("FIELD_CHECK 9\n")); + VOID(printf("\n")); + VOID(printf("pack type as a set of flags:\n")); + VOID(printf("PACK_TYPE_SELECTED 1\n")); + VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n")); + VOID(printf("PACK_TYPE_ZERO_FILL 4\n")); + VOID(printf("\n")); + } + for (i=0 ; i++ < fields ; counts++) + { + write_bits((ulonglong) (int) counts->field_type, 5); + write_bits(counts->pack_type,6); + if (counts->pack_type & PACK_TYPE_ZERO_FILL) + write_bits(counts->max_zero_fill,5); + else + write_bits(counts->length_bits,5); + write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits); + DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + i , counts->field_type, counts->pack_type, + counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + if (verbose >= 2) + VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u " + "tree: %2u length: %4u\n", i , counts->field_type, + counts->pack_type, counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + } + flush_bits(); + return; +} + + /* Write all huff_trees to new datafile. Return tot count of + elements in all trees + Returns 0 on error */ + +static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) +{ + uint i,int_length; + uint tree_no; + uint codes; + uint errors= 0; + uint *packed_tree,*offset,length; + my_off_t elements; + + /* Find the highest number of elements in the trees. */ + for (i=length=0 ; i < trees ; i++) + if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length) + length=huff_tree[i].elements; + /* + Allocate a buffer for packing a decode tree. Two numbers per element + (left child and right child). + */ + if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) + { + my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2); + return 0; + } + + DBUG_PRINT("info", ("")); + if (verbose >= 2) + VOID(printf("\n")); + tree_no= 0; + intervall_length=0; + for (elements=0; trees-- ; huff_tree++) + { + /* Skip columns that have been joined with other columns. */ + if (huff_tree->tree_number == 0) + continue; /* Deleted tree */ + tree_no++; + DBUG_PRINT("info", ("")); + if (verbose >= 3) + VOID(printf("\n")); + /* Count the total number of elements (byte codes or column values). */ + elements+=huff_tree->elements; + huff_tree->max_offset=2; + /* Build a tree of offsets and codes for decoding in 'packed_tree'. */ + if (huff_tree->elements <= 1) + offset=packed_tree; + else + offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree); + + /* This should be the same as 'length' above. */ + huff_tree->offset_bits=max_bit(huff_tree->max_offset); + + /* + Since we check this during collecting the distinct column values, + this should never happen. + */ + if (huff_tree->max_offset >= IS_OFFSET) + { /* This should be impossible */ + VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n", + huff_tree->max_offset)); + my_afree((gptr) packed_tree); + return 0; + } + + DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu " + "char_bits: %u\n", + (ulong) (file_buffer.pos - file_buffer.buffer), + huff_tree->elements, (ulong) (offset - packed_tree), + huff_tree->char_bits)); + if (!huff_tree->counts->tree_buff) + { + /* We do a byte compression on this column. Mark with bit 0. */ + write_bits(0,1); + write_bits(huff_tree->min_chr,8); + write_bits(huff_tree->elements,9); + write_bits(huff_tree->char_bits,5); + write_bits(huff_tree->offset_bits,5); + int_length=0; + } + else + { + int_length=(uint) (huff_tree->counts->tree_pos - + huff_tree->counts->tree_buff); + /* We have distinct column values for this column. Mark with bit 1. */ + write_bits(1,1); + write_bits(huff_tree->elements,15); + write_bits(int_length,16); + write_bits(huff_tree->char_bits,5); + write_bits(huff_tree->offset_bits,5); + intervall_length+=int_length; + } + DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u " + "offset_bits: %2u %s: %5u codelen: %2u", + tree_no, huff_tree->elements, huff_tree->char_bits, + huff_tree->offset_bits, huff_tree->counts->tree_buff ? + "bufflen" : "min_chr", huff_tree->counts->tree_buff ? + int_length : huff_tree->min_chr, huff_tree->height)); + if (verbose >= 2) + VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u " + "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements, + huff_tree->char_bits, huff_tree->offset_bits, + huff_tree->counts->tree_buff ? "bufflen" : "min_chr", + huff_tree->counts->tree_buff ? int_length : + huff_tree->min_chr, huff_tree->height)); + + /* Check that the code tree length matches the element count. */ + length=(uint) (offset-packed_tree); + if (length != huff_tree->elements*2-2) + { + VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n", + length, huff_tree->elements * 2 - 2)); + errors++; + break; + } + + for (i=0 ; i < length ; i++) + { + if (packed_tree[i] & IS_OFFSET) + write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits), + huff_tree->offset_bits+1); + else + write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1); + DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x", + i, (packed_tree[i] & IS_OFFSET) ? + " -> " : "", (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + if (verbose >= 3) + VOID(printf("tree[0x%04x]: %s0x%04x\n", + i, (packed_tree[i] & IS_OFFSET) ? " -> " : "", + (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + } + flush_bits(); + + /* + Display coding tables and check their correctness. + */ + codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256; + for (i= 0; i < codes; i++) + { + ulonglong code; + uint bits; + uint len; + uint idx; + + if (! (len= huff_tree->code_len[i])) + continue; + DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], + huff_tree->code_len[i]))); + if (verbose >= 3) + VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], huff_tree->code_len[i]))); + + /* Check that the encode table decodes correctly. */ + code= 0; + bits= 0; + idx= 0; + DBUG_EXECUTE_IF("forcechkerr1", len--;); + DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code);); + DBUG_EXECUTE_IF("forcechkerr3", idx= length;); + for (;;) + { + if (! len) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n", + hexdigits(huff_tree->code[i]), huff_tree->code_len[i])); + errors++; + break; + } + code<<= 1; + code|= (huff_tree->code[i] >> (--len)) & 1; + bits++; + if (bits > 8 * sizeof(code)) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + bits, 8 * sizeof(code))); + errors++; + break; + } + idx+= code & 1; + if (idx >= length) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n", + idx, length)); + errors++; + break; + } + if (packed_tree[idx] & IS_OFFSET) + idx+= packed_tree[idx] & ~IS_OFFSET; + else + break; /* Hit a leaf. This contains the result value. */ + } + if (errors) + break; + + DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;); + if (packed_tree[idx] != i) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n", + packed_tree[idx], i)); + errors++; + break; + } + } /*end for (codes)*/ + if (errors) + break; + + /* Write column values in case of distinct column value compression. */ + if (huff_tree->counts->tree_buff) + { + for (i=0 ; i < int_length ; i++) + { + write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8); + DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x", + i, (uchar) huff_tree->counts->tree_buff[i])); + if (verbose >= 3) + VOID(printf("column_values[0x%04x]: 0x%02x\n", + i, (uchar) huff_tree->counts->tree_buff[i])); + } + } + flush_bits(); + } + DBUG_PRINT("info", ("")); + if (verbose >= 2) + VOID(printf("\n")); + my_afree((gptr) packed_tree); + if (errors) + { + VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n")); + return 0; + } + return elements; +} + + +static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, + uint *offset) +{ + uint *prev_offset; + + prev_offset= offset; + /* + 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null, + then there is no left child and, hence no right child either. This + is a property of a binary tree. An element is either a node with two + childs, or a leaf without childs. + + The current element is always a node with two childs. Go left first. + */ + if (!element->a.nod.left->a.leaf.null) + { + /* Store the byte code or the index of the column value. */ + prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; + offset+=2; + } + else + { + /* + Recursively traverse the tree to the left. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ + prev_offset[0]= IS_OFFSET+2; + offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2); + } + + /* Now, check the right child. */ + if (!element->a.nod.right->a.leaf.null) + { + /* Store the byte code or the index of the column value. */ + prev_offset[1]=element->a.nod.right->a.leaf.element_nr; + return offset; + } + else + { + /* + Recursively traverse the tree to the right. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ + uint temp=(uint) (offset-prev_offset-1); + prev_offset[1]= IS_OFFSET+ temp; + if (huff_tree->max_offset < temp) + huff_tree->max_offset = temp; + return make_offset_code_tree(huff_tree,element->a.nod.right,offset); + } +} + + /* Get number of bits neaded to represent value */ + +static uint max_bit(register uint value) +{ + reg2 uint power=1; + + while ((value>>=1)) + power++; + return (power); +} + + +static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) +{ + int error; + uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length, + intervall,field_length,max_pack_length,pack_blob_length; + my_off_t record_count; + char llbuf[32]; + ulong length,pack_length; + byte *record,*pos,*end_pos,*record_pos,*start_pos; + HUFF_COUNTS *count,*end_count; + HUFF_TREE *tree; + MARIA_HA *isam_file=mrg->file[0]; + uint pack_version= (uint) isam_file->s->pack.version; + DBUG_ENTER("compress_isam_file"); + + /* Allocate a buffer for the records (excluding blobs). */ + if (!(record=(byte*) my_alloca(isam_file->s->base.reclength))) + return -1; + + end_count=huff_counts+isam_file->s->base.fields; + min_record_length= (uint) ~0; + max_record_length=0; + + /* + Calculate the maximum number of bits required to pack the records. + Remember to understand 'max_zero_fill' as 'min_zero_fill'. + The tree height determines the maximum number of bits per value. + Some fields skip leading or trailing spaces or zeroes. The skipped + number of bytes is encoded by 'length_bits' bits. + Empty blobs and varchar are encoded with a single 1 bit. Other blobs + and varchar get a leading 0 bit. + */ + for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++) + { + if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) + huff_counts[i].max_zero_fill=0; + if (huff_counts[i].field_type == FIELD_CONSTANT || + huff_counts[i].field_type == FIELD_ZERO || + huff_counts[i].field_type == FIELD_CHECK) + continue; + if (huff_counts[i].field_type == FIELD_INTERVALL) + max_calc_length+=huff_counts[i].tree->height; + else if (huff_counts[i].field_type == FIELD_BLOB || + huff_counts[i].field_type == FIELD_VARCHAR) + max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1; + else + max_calc_length+= + (huff_counts[i].field_length - huff_counts[i].max_zero_fill)* + huff_counts[i].tree->height+huff_counts[i].length_bits; + } + max_calc_length= (max_calc_length + 7) / 8; + pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length); + record_count=0; + /* 'max_blob_length' is the max length of all blobs of a record. */ + pack_blob_length= isam_file->s->base.blobs ? + _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0; + max_pack_length=pack_ref_length+pack_blob_length; + + DBUG_PRINT("fields", ("===")); + mrg_reset(mrg); + while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) + { + ulong tot_blob_length=0; + if (! error) + { + if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length)) + break; + record_pos= (byte*) file_buffer.pos; + file_buffer.pos+=max_pack_length; + for (start_pos=record, count= huff_counts; count < end_count ; count++) + { + end_pos=start_pos+(field_length=count->field_length); + tree=count->tree; + + DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + (ulong) (count - huff_counts + 1), + count->field_type, + count->pack_type, count->max_zero_fill, + count->length_bits, count->tree->tree_number, + count->field_length)); + + /* Check if the column contains spaces only. */ + if (count->pack_type & PACK_TYPE_SPACE_FIELDS) + { + for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ; + if (pos == end_pos) + { + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1")); + DBUG_PRINT("fields", ("---")); + write_bits(1,1); + start_pos=end_pos; + continue; + } + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1")); + write_bits(0,1); + } + end_pos-=count->max_zero_fill; + field_length-=count->max_zero_fill; + + switch(count->field_type) { + case FIELD_SKIP_ZERO: + if (!memcmp((byte*) start_pos,zero_string,field_length)) + { + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1")); + write_bits(1,1); + start_pos=end_pos; + break; + } + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1")); + write_bits(0,1); + /* Fall through */ + case FIELD_NORMAL: + DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes", + (ulong) (end_pos - start_pos))); + for ( ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + break; + case FIELD_SKIP_ENDSPACE: + for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; + length= (ulong) (end_pos - pos); + if (count->pack_type & PACK_TYPE_SELECTED) + { + if (length > count->min_space) + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(1,1); + write_bits(length,count->length_bits); + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE not more than min_space, " + "bits: 1")); + write_bits(0,1); + pos=end_pos; + } + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes", + (ulong) (pos - start_pos))); + for ( ; start_pos < pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + start_pos=end_pos; + break; + case FIELD_SKIP_PRESPACE: + for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; + length= (ulong) (pos - start_pos); + if (count->pack_type & PACK_TYPE_SELECTED) + { + if (length > count->min_space) + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(1,1); + write_bits(length,count->length_bits); + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE not more than min_space, " + "bits: 1")); + pos=start_pos; + write_bits(0,1); + } + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes", + (ulong) (end_pos - start_pos))); + for (start_pos=pos ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + break; + case FIELD_CONSTANT: + case FIELD_ZERO: + case FIELD_CHECK: + DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK")); + start_pos=end_pos; + break; + case FIELD_INTERVALL: + global_count=count; + pos=(byte*) tree_search(&count->int_tree, start_pos, + count->int_tree.custom_arg); + intervall=(uint) (pos - count->tree_buff)/field_length; + DBUG_PRINT("fields", ("FIELD_INTERVALL")); + DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u", + intervall, hexdigits(tree->code[intervall]), + (uint) tree->code_len[intervall])); + write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); + start_pos=end_pos; + break; + case FIELD_BLOB: + { + ulong blob_length= _ma_calc_blob_length(field_length- + maria_portable_sizeof_char_ptr, + start_pos); + /* Empty blobs are encoded with a single 1 bit. */ + if (!blob_length) + { + DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1")); + write_bits(1,1); + } + else + { + byte *blob,*blob_end; + DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1")); + write_bits(0,1); + /* Write the blob length. */ + DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u", + blob_length, count->length_bits)); + write_bits(blob_length,count->length_bits); + memcpy_fixed(&blob,end_pos-maria_portable_sizeof_char_ptr, + sizeof(char*)); + blob_end=blob+blob_length; + /* Encode the blob bytes. */ + for ( ; blob < blob_end ; blob++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *blob, hexdigits(tree->code[(uchar) *blob]), + (uint) tree->code_len[(uchar) *blob], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *blob], + (uint) tree->code_len[(uchar) *blob]); + } + tot_blob_length+=blob_length; + } + start_pos= end_pos; + break; + } + case FIELD_VARCHAR: + { + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + /* Empty varchar are encoded with a single 1 bit. */ + if (!col_length) + { + DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1")); + write_bits(1,1); /* Empty varchar */ + } + else + { + byte *end=start_pos+pack_length+col_length; + DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1")); + write_bits(0,1); + /* Write the varchar length. */ + DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u", + col_length, count->length_bits)); + write_bits(col_length,count->length_bits); + /* Encode the varchar bytes. */ + for (start_pos+=pack_length ; start_pos < end ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + } + start_pos= end_pos; + break; + } + case FIELD_LAST: + case FIELD_enum_val_count: + abort(); /* Impossible */ + } + start_pos+=count->max_zero_fill; + DBUG_PRINT("fields", ("---")); + } + flush_bits(); + length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length; + pack_length= _ma_save_pack_length(pack_version, record_pos, length); + if (pack_blob_length) + pack_length+= _ma_save_pack_length(pack_version, record_pos + pack_length, + tot_blob_length); + DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu " + "length-bytes: %lu", (ulong) record_count, length, + tot_blob_length, pack_length)); + DBUG_PRINT("fields", ("===")); + + /* Correct file buffer if the header was smaller */ + if (pack_length != max_pack_length) + { + bmove(record_pos+pack_length,record_pos+max_pack_length,length); + file_buffer.pos-= (max_pack_length-pack_length); + } + if (length < (ulong) min_record_length) + min_record_length=(uint) length; + if (length > (ulong) max_record_length) + max_record_length=(uint) length; + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) + { + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); + } + } + else if (error != HA_ERR_RECORD_DELETED) + break; + } + if (error == HA_ERR_END_OF_FILE) + error=0; + else + { + VOID(fprintf(stderr, "%s: Got error %d reading records\n", + my_progname, error)); + } + if (verbose >= 2) + VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf))); + + my_afree((gptr) record); + mrg->ref_length=max_pack_length; + mrg->min_pack_length=max_record_length ? min_record_length : 0; + mrg->max_pack_length=max_record_length; + DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0)); +} + + +static char *make_new_name(char *new_name, char *old_name) +{ + return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4); +} + +static char *make_old_name(char *new_name, char *old_name) +{ + return fn_format(new_name,old_name,"",OLD_EXT,2+4); +} + + /* rutines for bit writing buffer */ + +static void init_file_buffer(File file, pbool read_buffer) +{ + file_buffer.file=file; + file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE), + MYF(MY_WME)); + file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8; + file_buffer.pos_in_file=0; + error_on_write=0; + if (read_buffer) + { + + file_buffer.pos=file_buffer.end; + file_buffer.bits=0; + } + else + { + file_buffer.pos=file_buffer.buffer; + file_buffer.bits=BITS_SAVED; + } + file_buffer.bitbucket= 0; +} + + +static int flush_buffer(ulong neaded_length) +{ + ulong length; + + /* + file_buffer.end is 8 bytes lower than the real end of the buffer. + This is done so that the end-of-buffer condition does not need to be + checked for every byte (see write_bits()). Consequently, + file_buffer.pos can become greater than file_buffer.end. The + algorithms in the other functions ensure that there will never be + more than 8 bytes written to the buffer without an end-of-buffer + check. So the buffer cannot be overrun. But we need to check for the + near-to-buffer-end condition to avoid a negative result, which is + casted to unsigned and thus becomes giant. + */ + if ((file_buffer.pos < file_buffer.end) && + ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length)) + return 0; + length=(ulong) (file_buffer.pos-file_buffer.buffer); + file_buffer.pos=file_buffer.buffer; + file_buffer.pos_in_file+=length; + if (test_only) + return 0; + if (error_on_write|| my_write(file_buffer.file, + (const byte*) file_buffer.buffer, + length, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + error_on_write=1; + return 1; + } + + if (neaded_length != ~(ulong) 0 && + (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length) + { + char *tmp; + neaded_length+=256; /* some margin */ + tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME)); + if (!tmp) + return 1; + file_buffer.pos= ((uchar*) tmp + + (ulong) (file_buffer.pos - file_buffer.buffer)); + file_buffer.buffer= (uchar*) tmp; + file_buffer.end= (uchar*) (tmp+neaded_length-8); + } + return 0; +} + + +static void end_file_buffer(void) +{ + my_free((gptr) file_buffer.buffer,MYF(0)); +} + + /* output `bits` low bits of `value' */ + +static void write_bits(register ulonglong value, register uint bits) +{ + DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) || + (bits == 8 * sizeof(value))); + + if ((file_buffer.bits-= (int) bits) >= 0) + { + file_buffer.bitbucket|= value << file_buffer.bits; + } + else + { + reg3 ulonglong bit_buffer; + bits= (uint) -file_buffer.bits; + bit_buffer= (file_buffer.bitbucket | + ((bits != 8 * sizeof(value)) ? (value >> bits) : 0)); +#if BITS_SAVED == 64 + *file_buffer.pos++= (uchar) (bit_buffer >> 56); + *file_buffer.pos++= (uchar) (bit_buffer >> 48); + *file_buffer.pos++= (uchar) (bit_buffer >> 40); + *file_buffer.pos++= (uchar) (bit_buffer >> 32); +#endif + *file_buffer.pos++= (uchar) (bit_buffer >> 24); + *file_buffer.pos++= (uchar) (bit_buffer >> 16); + *file_buffer.pos++= (uchar) (bit_buffer >> 8); + *file_buffer.pos++= (uchar) (bit_buffer); + + if (bits != 8 * sizeof(value)) + value&= (((ulonglong) 1) << bits) - 1; + if (file_buffer.pos >= file_buffer.end) + VOID(flush_buffer(~ (ulong) 0)); + file_buffer.bits=(int) (BITS_SAVED - bits); + file_buffer.bitbucket= value << (BITS_SAVED - bits); + } + return; +} + + /* Flush bits in bit_buffer to buffer */ + +static void flush_bits(void) +{ + int bits; + ulonglong bit_buffer; + + bits= file_buffer.bits & ~7; + bit_buffer= file_buffer.bitbucket >> bits; + bits= BITS_SAVED - bits; + while (bits > 0) + { + bits-= 8; + *file_buffer.pos++= (uchar) (bit_buffer >> bits); + } + file_buffer.bits= BITS_SAVED; + file_buffer.bitbucket= 0; +} + + +/**************************************************************************** +** functions to handle the joined files +****************************************************************************/ + +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc) +{ + MARIA_SHARE *share=isam_file->s; + uint options=mi_uint2korr(share->state.header.options); + uint key; + DBUG_ENTER("save_state"); + + options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA; + mi_int2store(share->state.header.options,options); + + share->state.state.data_file_length=new_length; + share->state.state.del=0; + share->state.state.empty=0; + share->state.dellink= HA_OFFSET_ERROR; + share->state.split=(ha_rows) mrg->records; + share->state.version=(ulong) time((time_t*) 0); + if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) + { + /* + Some indexes are disabled, cannot use current key_file_length value + as an estimate of upper bound of index file size. Use packed data file + size instead. + */ + share->state.state.key_file_length= new_length; + } + /* + If there are no disabled indexes, keep key_file_length value from + original file so "mariachk -rq" can use this value (this is necessary + because index size cannot be easily calculated for fulltext keys) + */ + maria_clear_all_keys_active(share->state.key_map); + for (key=0 ; key < share->base.keys ; key++) + share->state.key_root[key]= HA_OFFSET_ERROR; + for (key=0 ; key < share->state.header.max_block_size ; key++) + share->state.key_del[key]= HA_OFFSET_ERROR; + isam_file->state->checksum=crc; /* Save crc here */ + share->changed=1; /* Force write of header */ + share->state.open_count=0; + share->global_changed=0; + VOID(my_chsize(share->kfile, share->base.keystart, 0, MYF(0))); + if (share->base.keys) + isamchk_neaded=1; + DBUG_RETURN(_ma_state_info_write(share->kfile,&share->state,1+2)); +} + + +static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc) +{ + MARIA_STATE_INFO state; + MARIA_HA *isam_file=mrg->file[0]; + uint options; + DBUG_ENTER("save_state_mrg"); + + state= isam_file->s->state; + options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD | + HA_OPTION_READ_ONLY_DATA); + mi_int2store(state.header.options,options); + state.state.data_file_length=new_length; + state.state.del=0; + state.state.empty=0; + state.state.records=state.split=(ha_rows) mrg->records; + /* See comment above in save_state about key_file_length handling. */ + if (mrg->src_file_has_indexes_disabled) + { + isam_file->s->state.state.key_file_length= + max(isam_file->s->state.state.key_file_length, new_length); + } + state.dellink= HA_OFFSET_ERROR; + state.version=(ulong) time((time_t*) 0); + maria_clear_all_keys_active(state.key_map); + state.state.checksum=crc; + if (isam_file->s->base.keys) + isamchk_neaded=1; + state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */ + DBUG_RETURN (_ma_state_info_write(file,&state,1+2)); +} + + +/* reset for mrg_rrnd */ + +static void mrg_reset(PACK_MRG_INFO *mrg) +{ + if (mrg->current) + { + maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0); + mrg->current=0; + } +} + +static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) +{ + int error; + MARIA_HA *isam_info; + my_off_t filepos; + + if (!info->current) + { + isam_info= *(info->current=info->file); + info->end=info->current+info->count; + maria_extra(isam_info, HA_EXTRA_RESET, 0); + maria_extra(isam_info, HA_EXTRA_CACHE, 0); + filepos=isam_info->s->pack.header_length; + } + else + { + isam_info= *info->current; + filepos= isam_info->nextpos; + } + + for (;;) + { + isam_info->update&= HA_STATE_CHANGED; + if (!(error=(*isam_info->s->read_rnd)(isam_info,(byte*) buf, + filepos, 1)) || + error != HA_ERR_END_OF_FILE) + return (error); + maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0); + if (info->current+1 == info->end) + return(HA_ERR_END_OF_FILE); + info->current++; + isam_info= *info->current; + filepos=isam_info->s->pack.header_length; + maria_extra(isam_info,HA_EXTRA_RESET, 0); + maria_extra(isam_info,HA_EXTRA_CACHE, 0); + } +} + + +static int mrg_close(PACK_MRG_INFO *mrg) +{ + uint i; + int error=0; + for (i=0 ; i < mrg->count ; i++) + error|=maria_close(mrg->file[i]); + if (mrg->free_file) + my_free((gptr) mrg->file,MYF(0)); + return error; +} + + +#if !defined(DBUG_OFF) +/* + Fake the counts to get big Huffman codes. + + SYNOPSIS + fakebigcodes() + huff_counts A pointer to the counts array. + end_count A pointer past the counts array. + + DESCRIPTION + + Huffman coding works by removing the two least frequent values from + the list of values and add a new value with the sum of their + incidences in a loop until only one value is left. Every time a + value is reused for a new value, it gets one more bit for its + encoding. Hence, the least frequent values get the longest codes. + + To get a maximum code length for a value, two of the values must + have an incidence of 1. As their sum is 2, the next infrequent value + must have at least an incidence of 2, then 4, 8, 16 and so on. This + means that one needs 2**n bytes (values) for a code length of n + bits. However, using more distinct values forces the use of longer + codes, or reaching the code length with less total bytes (values). + + To get 64(32)-bit codes, I sort the counts by decreasing incidence. + I assign counts of 1 to the two most frequent values, a count of 2 + for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All + the remaining values get 1. That way every possible byte has an + assigned code, though not all codes are used if not all byte values + are present in the column. + + This strategy would work with distinct column values too, but + requires that at least 64(32) values are present. To make things + easier here, I cancel all distinct column values and force byte + compression for all columns. + + RETURN + void +*/ + +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count) +{ + HUFF_COUNTS *count; + my_off_t *cur_count_p; + my_off_t *end_count_p; + my_off_t **cur_sort_p; + my_off_t **end_sort_p; + my_off_t *sort_counts[256]; + my_off_t total; + DBUG_ENTER("fakebigcodes"); + + for (count= huff_counts; count < end_count; count++) + { + /* + Remove distinct column values. + */ + if (huff_counts->tree_buff) + { + my_free((gptr) huff_counts->tree_buff, MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff= NULL; + DBUG_PRINT("fakebigcodes", ("freed distinct column values")); + } + + /* + Sort counts by decreasing incidence. + */ + cur_count_p= count->counts; + end_count_p= cur_count_p + 256; + cur_sort_p= sort_counts; + while (cur_count_p < end_count_p) + *(cur_sort_p++)= cur_count_p++; + (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp); + + /* + Assign faked counts. + */ + cur_sort_p= sort_counts; +#if SIZEOF_LONG_LONG > 4 + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1; +#else + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2; +#endif + /* Most frequent value gets a faked count of 1. */ + **(cur_sort_p++)= 1; + total= 1; + while (cur_sort_p < end_sort_p) + { + **(cur_sort_p++)= total; + total<<= 1; + } + /* Set the last value. */ + **(cur_sort_p++)= --total; + /* + Set the remaining counts. + */ + end_sort_p= sort_counts + 256; + while (cur_sort_p < end_sort_p) + **(cur_sort_p++)= 1; + } + DBUG_VOID_RETURN; +} + + +/* + Compare two counts for reverse sorting. + + SYNOPSIS + fakecmp() + count1 One count. + count2 Another count. + + RETURN + 1 count1 < count2 + 0 count1 == count2 + -1 count1 > count2 +*/ + +static int fakecmp(my_off_t **count1, my_off_t **count2) +{ + return ((**count1 < **count2) ? 1 : + (**count1 > **count2) ? -1 : 0); +} +#endif diff --git a/storage/maria/maria_rename.sh b/storage/maria/maria_rename.sh new file mode 100755 index 00000000000..fb20e47e635 --- /dev/null +++ b/storage/maria/maria_rename.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +replace myisam maria MYISAM MARIA MyISAM MARIA -- mysql-test/t/*maria*test mysql-test/r/*maria*result + +FILES=`echo sql/ha_maria.{cc,h} include/maria*h storage/maria/*.{c,h}` + +replace myisam maria MYISAM MARIA MyISAM MARIA myisam.h maria.h myisamdef.h maria_def.h mi_ maria_ ft_ maria_ft_ "Copyright (C) 2000" "Copyright (C) 2006" MI_ISAMINFO MARIA_INFO MI_CREATE_INFO MARIA_CREATE_INFO maria_isam_ maria_ MI_INFO MARIA_HA MI_ MARIA_ MARIACHK MARIA_CHK rt_index.h ma_rt_index.h rtree_ maria_rtree rt_key.h ma_rt_key.h rt_mbr.h ma_rt_mbr.h -- $FILES + +replace check_table_is_closed _ma_check_table_is_closed test_if_reopen _ma_test_if_reopen my_n_base_info_read maria_n_base_info_read update_auto_increment _ma_update_auto_increment save_pack_length _ma_save_packlength calc_pack_length _ma_calc_pack_length -- $FILES + +replace mi_ ma_ ft_ ma_ft_ rt_ ma_rt_ myisam maria myisamchk maria_chk myisampack maria_pack myisamlog maria_log -- storage/maria/Makefile.am + +# +# Restore wrong replaces +# + +replace maria_sint1korr mi_sint1korr maria_uint1korr mi_uint1korr maria_sint2korr mi_sint2korr maria_sint3korr mi_sint3korr maria_sint4korr mi_sint4korr maria_sint8korr mi_sint8korr maria_uint2korr mi_uint2korr maria_uint3korr mi_uint3korr maria_uint4korr mi_uint4korr maria_uint5korr mi_uint5korr maria_uint6korr mi_uint6korr maria_uint7korr mi_uint7korr maria_uint8korr mi_uint8korr maria_int1store mi_int1store maria_int2store mi_int2store maria_int3store mi_int3store maria_int4store mi_int4store maria_int5store mi_int5store maria_int6store mi_int6store maria_int7store mi_int7store maria_int8store mi_int8store maria_float4store mi_float4store maria_float4get mi_float4get maria_float8store mi_float8store maria_float8get mi_float8get maria_rowstore mi_rowstore maria_rowkorr mi_rowkorr maria_sizestore mi_sizestore maria_sizekorr mi_sizekorr _maria_maria_ _maria MARIA_MAX_POSSIBLE_KEY HA_MAX_POSSIBLE_KEY MARIA_MAX_KEY_BUFF HA_MAX_KEY_BUFF MARIA_MAX_KEY_SEG HA_MAX_KEY_SEG maria_ft_sintXkorr ft_sintXkorr maria_ft_intXstore ft_intXstore maria_ft_boolean_syntax ft_boolean_syntax maria_ft_min_word_len ft_min_word_len maria_ft_max_word_len ft_max_word_len -- $FILES diff --git a/storage/maria/test_pack b/storage/maria/test_pack new file mode 100755 index 00000000000..689645b1661 --- /dev/null +++ b/storage/maria/test_pack @@ -0,0 +1,10 @@ +silent="-s" +suffix="" + +ma_test1$suffix -s ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -us test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -S ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ;maria_chk$suffix -us test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -b ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -w ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -ros test1 ; maria_chk$suffix -es test1 + +ma_test2$suffix -s -t4 ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2 +ma_test2$suffix -s -t4 -b ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2 diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index 3c6a5c22234..c67db7c32fb 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -51,7 +51,8 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_delete_table.c mi_rename.c mi_check.c \ mi_keycache.c mi_preload.c \ ft_parser.c ft_stopwords.c ft_static.c \ - ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \ + ft_update.c ft_boolean_search.c ft_nlq_search.c \ + ft_myisam.c sort.c \ rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY? DEFS = -DMAP_TO_USE_RAID diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c index 4204f211f2e..a4c39f06105 100644 --- a/storage/myisam/ft_boolean_search.c +++ b/storage/myisam/ft_boolean_search.c @@ -150,7 +150,7 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ - int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, (uchar*) (*a)->word+1,(*a)->len-1,0,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); @@ -183,7 +183,7 @@ static int ftb_query_add_word(void *param, char *word, int word_len, case FT_TOKEN_WORD: ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_WORD) + - (info->trunc ? MI_MAX_KEY_BUFF : + (info->trunc ? HA_MAX_KEY_BUFF : word_len * ftb_param->ftb->charset->mbmaxlen + HA_FT_WLEN + ftb_param->ftb->info->s->rec_reflength)); @@ -332,7 +332,6 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) byte *lastkey_buf=ftbw->word+ftbw->off; LINT_INIT(off); - LINT_INIT(off); if (ftbw->flags & FTB_FLAG_TRUNC) lastkey_buf+=ftbw->len; @@ -376,7 +375,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) if (!r && !ftbw->off) { - r= mi_compare_text(ftb->charset, + r= ha_compare_text(ftb->charset, info->lastkey+1, info->lastkey_length-extra-1, (uchar*) ftbw->word+1, @@ -599,8 +598,9 @@ static int ftb_phrase_add_word(void *param, char *word, int word_len, { FT_WORD *phrase_word= (FT_WORD *)phrase->data; FT_WORD *document_word= (FT_WORD *)document->data; - if (my_strnncoll(phrase_param->cs, phrase_word->pos, phrase_word->len, - document_word->pos, document_word->len)) + if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos, + phrase_word->len, + (uchar*) document_word->pos, document_word->len)) return 0; } phrase_param->match++; @@ -829,7 +829,7 @@ static int ftb_find_relevance_add_word(void *param, char *word, int len, for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2) { ftbw= ftb->list[c]; - if (mi_compare_text(ftb->charset, (uchar*)word, len, + if (ha_compare_text(ftb->charset, (uchar*)word, len, (uchar*)ftbw->word+1, ftbw->len-1, (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0) b= c; @@ -839,7 +839,7 @@ static int ftb_find_relevance_add_word(void *param, char *word, int len, for (; c >= 0; c--) { ftbw= ftb->list[c]; - if (mi_compare_text(ftb->charset, (uchar*)word, len, + if (ha_compare_text(ftb->charset, (uchar*)word, len, (uchar*)ftbw->word + 1,ftbw->len - 1, (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) break; diff --git a/storage/myisam/ft_myisam.c b/storage/myisam/ft_myisam.c new file mode 100644 index 00000000000..76c04ba4c0b --- /dev/null +++ b/storage/myisam/ft_myisam.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and myisam +*/ + +#include "ftdefs.h" + +FT_INFO *ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); + else + res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, + record); + return res; +} diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c index b4468d8bd95..3e79366a81e 100644 --- a/storage/myisam/ft_nlq_search.c +++ b/storage/myisam/ft_nlq_search.c @@ -104,7 +104,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) { if (keylen && - mi_compare_text(aio->charset,info->lastkey+1, + ha_compare_text(aio->charset,info->lastkey+1, info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) break; diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c index f333a661ea9..713ae8e8aab 100644 --- a/storage/myisam/ft_parser.c +++ b/storage/myisam/ft_parser.c @@ -34,7 +34,7 @@ typedef struct st_my_ft_parser_param static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { - return mi_compare_text(cs, (uchar*) w1->pos, w1->len, + return ha_compare_text(cs, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, 0, 0); } diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c index 6cfb0d59e62..19f65b09ae8 100644 --- a/storage/myisam/ft_static.c +++ b/storage/myisam/ft_static.c @@ -56,19 +56,6 @@ const struct _ft_vft _ft_vft_boolean = { }; -FT_INFO *ft_init_search(uint flags, void *info, uint keynr, - byte *query, uint query_len, CHARSET_INFO *cs, - byte *record) -{ - FT_INFO *res; - if (flags & FT_BOOL) - res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); - else - res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, - record); - return res; -} - const char *ft_stopword_file = 0; const char *ft_precompiled_stopwords[] = { diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index 3b5a1752ff7..8da32474690 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -30,7 +30,7 @@ static TREE *stopwords3=NULL; static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), FT_STOPWORD *w1, FT_STOPWORD *w2) { - return mi_compare_text(default_charset_info, + return ha_compare_text(default_charset_info, (uchar *)w1->pos,w1->len, (uchar *)w2->pos,w2->len,0,0); } diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c index 1ec91b41218..548c5c13f36 100644 --- a/storage/myisam/ft_update.c +++ b/storage/myisam/ft_update.c @@ -186,7 +186,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || - mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, (uchar*) ftsi2.pos,ftsi2.len,0,0))) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); } @@ -214,7 +214,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, error=0; while(old_word->pos && new_word->pos) { - cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, + cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len, (uchar*) new_word->pos,new_word->len,0,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); diff --git a/storage/myisam/fulltext.h b/storage/myisam/fulltext.h index d8c74d4e94b..90f6ba3bb8b 100644 --- a/storage/myisam/fulltext.h +++ b/storage/myisam/fulltext.h @@ -21,18 +21,8 @@ #include "myisamdef.h" #include "ft_global.h" -#define HA_FT_WTYPE HA_KEYTYPE_FLOAT -#define HA_FT_WLEN 4 -#define FT_SEGS 2 - -#define ft_sintXkorr(A) mi_sint4korr(A) -#define ft_intXstore(T,A) mi_int4store(T,A) - -extern const HA_KEYSEG ft_keysegs[FT_SEGS]; - int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *); int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t); int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t); uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *); - diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index eb2f42697ce..e23e220b2d5 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -35,15 +35,15 @@ /* Functions defined in this file */ -static int check_k_link(MI_CHECK *param, MI_INFO *info,uint nr); -static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, +static int check_k_link(HA_CHECK *param, MI_INFO *info,uint nr); +static int chk_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level); static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); -static int writekeys(MI_CHECK *param, MI_INFO *info,byte *buff, +static int writekeys(HA_CHECK *param, MI_INFO *info,byte *buff, my_off_t filepos); -static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file); static int sort_key_read(MI_SORT_PARAM *sort_param,void *key); static int sort_ft_key_read(MI_SORT_PARAM *sort_param,void *key); @@ -57,13 +57,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, reg1 SORT_KEY_BLOCKS *key_block, uchar *key, my_off_t prev_block); static int sort_delete_record(MI_SORT_PARAM *sort_param); -/*static int flush_pending_blocks(MI_CHECK *param);*/ -static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, +/*static int flush_pending_blocks(HA_CHECK *param);*/ +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length); static ha_checksum mi_byte_checksum(const byte *buf, uint length); -static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share); +static void set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share); -void myisamchk_init(MI_CHECK *param) +void myisamchk_init(HA_CHECK *param) { bzero((gptr) param,sizeof(*param)); param->opt_follow_links=1; @@ -85,7 +85,7 @@ void myisamchk_init(MI_CHECK *param) /* Check the status flags for the table */ -int chk_status(MI_CHECK *param, register MI_INFO *info) +int chk_status(HA_CHECK *param, register MI_INFO *info) { MYISAM_SHARE *share=info->s; @@ -113,7 +113,7 @@ int chk_status(MI_CHECK *param, register MI_INFO *info) /* Check delete links */ -int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag) +int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag) { reg2 ha_rows i; uint delete_link_length; @@ -222,7 +222,7 @@ wrong: /* Check delete links in index file */ -static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) +static int check_k_link(HA_CHECK *param, register MI_INFO *info, uint nr) { my_off_t next_link; uint block_size=(nr+1)*MI_MIN_KEY_BLOCK_LENGTH; @@ -266,7 +266,7 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) /* Check sizes of files */ -int chk_size(MI_CHECK *param, register MI_INFO *info) +int chk_size(HA_CHECK *param, register MI_INFO *info) { int error=0; register my_off_t skr,size; @@ -342,7 +342,7 @@ int chk_size(MI_CHECK *param, register MI_INFO *info) /* Check keys */ -int chk_key(MI_CHECK *param, register MI_INFO *info) +int chk_key(HA_CHECK *param, register MI_INFO *info) { uint key,found_keys=0,full_text_keys=0,result=0; ha_rows keys; @@ -528,7 +528,7 @@ do_stat: } /* chk_key */ -static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int chk_index_down(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { @@ -651,13 +651,13 @@ int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, /* Check if index is ok */ -static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { int flag; uint used_length,comp_flag,nod_flag,key_length=0; - uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; my_off_t next_page,record; char llbuff[22]; uint diff_pos[2]; @@ -854,7 +854,7 @@ static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo) /* Check that record-link is ok */ -int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) +int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend) { int error,got_error,flag; uint key,left_length,b_type,field; @@ -864,7 +864,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) byte *record,*to; char llbuff[22],llbuff2[22],llbuff3[22]; ha_checksum intern_record_checksum; - ha_checksum key_checksum[MI_MAX_POSSIBLE_KEY]; + ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY]; my_bool static_row_size; MI_KEYDEF *keyinfo; MI_BLOCK_INFO block_info; @@ -1296,7 +1296,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) /* Recover old table by reading each record and writing all keys */ /* Save new datafile-name in temp_filename */ -int mi_repair(MI_CHECK *param, register MI_INFO *info, +int mi_repair(HA_CHECK *param, register MI_INFO *info, my_string name, int rep_quick) { int error,got_error; @@ -1306,7 +1306,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, File new_file; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; MI_SORT_PARAM sort_param; DBUG_ENTER("mi_repair"); @@ -1573,7 +1573,7 @@ err: /* Uppate keyfile when doing repair */ -static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, +static int writekeys(HA_CHECK *param, register MI_INFO *info, byte *buff, my_off_t filepos) { register uint i; @@ -1686,7 +1686,7 @@ int movepoint(register MI_INFO *info, byte *record, my_off_t oldpos, /* Tell system that we want all memory for our cache */ -void lock_memory(MI_CHECK *param __attribute__((unused))) +void lock_memory(HA_CHECK *param __attribute__((unused))) { #ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */ if (param->opt_lock_memory) @@ -1702,7 +1702,7 @@ void lock_memory(MI_CHECK *param __attribute__((unused))) /* Flush all changed blocks to disk */ -int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) +int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file) { if (flush_key_blocks(key_cache, file, FLUSH_RELEASE)) { @@ -1717,12 +1717,12 @@ int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) /* Sort index for more efficent reads */ -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name) +int mi_sort_index(HA_CHECK *param, register MI_INFO *info, my_string name) { reg2 uint key; reg1 MI_KEYDEF *keyinfo; File new_file; - my_off_t index_pos[MI_MAX_POSSIBLE_KEY]; + my_off_t index_pos[HA_MAX_POSSIBLE_KEY]; uint r_locks,w_locks; int old_lock; MYISAM_SHARE *share=info->s; @@ -1811,12 +1811,12 @@ err2: /* Sort records recursive using one index */ -static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file) { uint length,nod_flag,used_length, key_length; uchar *buff,*keypos,*endpos; - uchar key[MI_MAX_POSSIBLE_KEY_BUFF]; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF]; my_off_t new_page_pos,next_page; char llbuff[22]; DBUG_ENTER("sort_one_index"); @@ -1929,7 +1929,7 @@ int change_to_newfile(const char * filename, const char * old_ext, /* Locks a whole file */ /* Gives an error-message if file can't be locked */ -int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, +int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, const char *filetype, const char *filename) { if (my_lock(file,lock_type,start,F_TO_EOF, @@ -1946,7 +1946,7 @@ int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, /* Copy a block between two files */ -int filecopy(MI_CHECK *param, File to,File from,my_off_t start, +int filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type) { char tmp_buff[IO_SIZE],*buff; @@ -1997,7 +1997,7 @@ err: <>0 Error */ -int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, +int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick) { int got_error; @@ -2011,7 +2011,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, HA_KEYSEG *keyseg; ulong *rec_per_key_part; char llbuff[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; DBUG_ENTER("mi_repair_by_sort"); @@ -2367,7 +2367,7 @@ err: <>0 Error */ -int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, +int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick) { #ifndef THREAD @@ -2385,7 +2385,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, HA_KEYSEG *keyseg; char llbuff[22]; IO_CACHE_SHARE io_share; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; pthread_attr_t thr_attr; DBUG_ENTER("mi_repair_parallel"); @@ -2774,7 +2774,7 @@ err: static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) { int error; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; MI_INFO *info=sort_info->info; DBUG_ENTER("sort_key_read"); @@ -2801,7 +2801,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key) { int error; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; MI_INFO *info=sort_info->info; FT_WORD *wptr=0; DBUG_ENTER("sort_ft_key_read"); @@ -2858,8 +2858,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) my_off_t pos; byte *to; MI_BLOCK_INFO block_info; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; @@ -3246,8 +3246,8 @@ int sort_write_record(MI_SORT_PARAM *sort_param) ulong block_length,reclength; byte *from; byte block_buff[8]; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; MYISAM_SHARE *share=info->s; DBUG_ENTER("sort_write_record"); @@ -3273,7 +3273,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param) { /* must be sure that local buffer is big enough */ reclength=info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,sort_param->record)+ + _mi_calc_total_blob_length(info,sort_param->record)+ ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER; if (sort_info->buff_length < reclength) @@ -3361,8 +3361,8 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) { uint diff_pos[2]; char llbuff[22],llbuff2[22]; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param= sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param= sort_info->param; int cmp; if (sort_info->key_block->inited) @@ -3423,7 +3423,7 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) { - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; SORT_KEY_BLOCKS *key_block=sort_info->key_block; MYISAM_SHARE *share=sort_info->info->s; uint val_off, val_len; @@ -3470,7 +3470,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) { uint a_len, val_off, val_len, error; uchar *p; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; SORT_FT_BUF *ft_buf=sort_info->ft_buf; SORT_KEY_BLOCKS *key_block=sort_info->key_block; @@ -3500,7 +3500,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) } get_key_full_length_rdonly(val_off, ft_buf->lastkey); - if (mi_compare_text(sort_param->seg->charset, + if (ha_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, ft_buf->lastkey+1,val_off-1, 0, 0)==0) { @@ -3575,8 +3575,8 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, MI_KEY_PARAM s_temp; MI_INFO *info; MI_KEYDEF *keyinfo=sort_param->keyinfo; - SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_insert_key"); anc_buff=key_block->buff; @@ -3654,8 +3654,8 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param) uint i; int old_file,error; uchar *key; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; DBUG_ENTER("sort_delete_record"); @@ -3711,7 +3711,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) uint nod_flag,length; my_off_t filepos,key_file_length; SORT_KEY_BLOCKS *key_block; - SORT_INFO *sort_info= sort_param->sort_info; + MI_SORT_INFO *sort_info= sort_param->sort_info; myf myf_rw=sort_info->param->myf_rw; MI_INFO *info=sort_info->info; MI_KEYDEF *keyinfo=sort_param->keyinfo; @@ -3749,7 +3749,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) /* alloc space and pointers for key_blocks */ -static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length) { reg1 uint i; @@ -3786,7 +3786,7 @@ int test_if_almost_full(MI_INFO *info) /* Recreate table with bigger more alloced record-data */ -int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename) +int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename) { int error; MI_INFO info; @@ -3953,7 +3953,7 @@ end: /* write suffix to data file if neaded */ -int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile) +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile) { MI_INFO *info=sort_info->info; @@ -3974,7 +3974,7 @@ int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile) /* Update state and myisamchk_time of indexfile */ -int update_state_info(MI_CHECK *param, MI_INFO *info,uint update) +int update_state_info(HA_CHECK *param, MI_INFO *info,uint update) { MYISAM_SHARE *share=info->s; @@ -4046,7 +4046,7 @@ err: param->auto_increment is bigger than the biggest key. */ -void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, +void update_auto_increment_key(HA_CHECK *param, MI_INFO *info, my_bool repair_only) { byte *record; @@ -4278,7 +4278,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, static void -set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share) +set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share) { if ((sort_info->new_data_file_type=share->data_file_type) == COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index 3be998b2c17..d6ffba596f5 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -56,7 +56,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, HA_KEYSEG *keyseg,tmp_keyseg; MI_COLUMNDEF *rec; ulong *rec_per_key_part; - my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; MI_CREATE_INFO tmp_create_info; DBUG_ENTER("mi_create"); @@ -92,7 +92,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */ if (!(rec_per_key_part= - (ulong*) my_malloc((keys + uniques)*MI_MAX_KEY_SEG*sizeof(long), + (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long), MYF(MY_WME | MY_ZEROFILL)))) DBUG_RETURN(my_errno); @@ -414,7 +414,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, } } /* if HA_FULLTEXT */ key_segs+=keydef->keysegs; - if (keydef->keysegs > MI_MAX_KEY_SEG) + if (keydef->keysegs > HA_MAX_KEY_SEG) { my_errno=HA_WRONG_CREATE_OPTION; goto err; @@ -431,7 +431,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, keydef->block_length= MI_BLOCK_SIZE(length-real_length_diff, pointer,MI_MAX_KEYPTR_SIZE); if (keydef->block_length > MI_MAX_KEY_BLOCK_LENGTH || - length >= MI_MAX_KEY_BUFF) + length >= HA_MAX_KEY_BUFF) { my_errno=HA_WRONG_CREATE_OPTION; goto err; diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c index 2bc99d65dd2..ae464936653 100644 --- a/storage/myisam/mi_delete.c +++ b/storage/myisam/mi_delete.c @@ -160,7 +160,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo, DBUG_RETURN(my_errno=HA_ERR_CRASHED); } if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); DBUG_RETURN(my_errno=ENOMEM); @@ -222,7 +222,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, my_bool last_key; uchar *leaf_buff,*keypos; my_off_t leaf_page,next_block; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("d_search"); DBUG_DUMP("page",(byte*) anc_buff,mi_getint(anc_buff)); @@ -307,7 +307,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { leaf_page=_mi_kpos(nod_flag,keypos); if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); my_errno=ENOMEM; @@ -406,7 +406,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, int ret_value,length; uint a_length,nod_flag,tmp; my_off_t next_page; - uchar keybuff[MI_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; MYISAM_SHARE *share=info->s; MI_KEY_PARAM s_temp; DBUG_ENTER("del"); @@ -423,7 +423,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, { next_page= _mi_kpos(nod_flag,endpos); if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) ret_value= -1; @@ -510,7 +510,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, key_reflength,key_length; my_off_t next_page; - uchar anc_key[MI_MAX_KEY_BUFF],leaf_key[MI_MAX_KEY_BUFF], + uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF], *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key, *after_key; MI_KEY_PARAM s_temp; diff --git a/storage/myisam/mi_delete_all.c b/storage/myisam/mi_delete_all.c index 51f1e44d6d2..1e2fcac4486 100644 --- a/storage/myisam/mi_delete_all.c +++ b/storage/myisam/mi_delete_all.c @@ -22,7 +22,6 @@ int mi_delete_all_rows(MI_INFO *info) { uint i; - char buf[22]; MYISAM_SHARE *share=info->s; MI_STATE_INFO *state=&share->state; DBUG_ENTER("mi_delete_all_rows"); diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c index 36d88bd362a..44367fecc1c 100644 --- a/storage/myisam/mi_dynrec.c +++ b/storage/myisam/mi_dynrec.c @@ -240,7 +240,7 @@ int _mi_write_blob_record(MI_INFO *info, const byte *record) extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER+1); reclength= (info->s->base.pack_reclength + - _my_calc_total_blob_length(info,record)+ extra); + _mi_calc_total_blob_length(info,record)+ extra); #ifdef NOT_USED /* We now support big rows */ if (reclength > MI_DYN_MAX_ROW_LENGTH) { @@ -274,7 +274,7 @@ int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const byte *record) extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER); reclength= (info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,record)+ extra); + _mi_calc_total_blob_length(info,record)+ extra); #ifdef NOT_USED /* We now support big rows */ if (reclength > MI_DYN_MAX_ROW_LENGTH) { @@ -1244,7 +1244,7 @@ err: /* Calc length of blob. Update info in blobs->length */ -ulong _my_calc_total_blob_length(MI_INFO *info, const byte *record) +ulong _mi_calc_total_blob_length(MI_INFO *info, const byte *record) { ulong length; MI_BLOB *blob,*end; @@ -1278,7 +1278,7 @@ ulong _mi_calc_blob_length(uint length, const byte *pos) } -void _my_store_blob_length(byte *pos,uint pack_length,uint length) +void _mi_store_blob_length(byte *pos,uint pack_length,uint length) { switch (pack_length) { case 1: @@ -1431,7 +1431,7 @@ int _mi_cmp_dynamic_record(register MI_INFO *info, register const byte *record) if (info->s->base.blobs) { if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,record)))) + _mi_calc_total_blob_length(info,record)))) DBUG_RETURN(-1); } reclength=_mi_rec_pack(info,buffer,record); diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c index f8463a0b6b0..859912dfdab 100644 --- a/storage/myisam/mi_key.c +++ b/storage/myisam/mi_key.c @@ -448,7 +448,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, /* The above changed info->lastkey2. Inform mi_rnext_same(). */ info->update&= ~HA_STATE_RNEXT_SAME; - _my_store_blob_length(record+keyseg->start, + _mi_store_blob_length(record+keyseg->start, (uint) keyseg->bit_start,length); key+=length; } diff --git a/storage/myisam/mi_log.c b/storage/myisam/mi_log.c index 13842c56828..40566df10d8 100644 --- a/storage/myisam/mi_log.c +++ b/storage/myisam/mi_log.c @@ -134,7 +134,7 @@ void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info, if (!info->s->base.blobs) length=info->s->base.reclength; else - length=info->s->base.reclength+ _my_calc_total_blob_length(info,record); + length=info->s->base.reclength+ _mi_calc_total_blob_length(info,record); buff[0]=(char) command; mi_int2store(buff+1,info->dfile); mi_int4store(buff+3,pid); diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index abf1d1ea9a7..a00bd6be008 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -83,8 +83,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) char *disk_cache, *disk_pos, *end_pos; MI_INFO info,*m_info,*old_info; MYISAM_SHARE share_buff,*share; - ulong rec_per_key_part[MI_MAX_POSSIBLE_KEY*MI_MAX_KEY_SEG]; - my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; + ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; ulonglong max_key_file_length, max_data_file_length; DBUG_ENTER("mi_open"); @@ -105,7 +105,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share_buff.state.rec_per_key_part=rec_per_key_part; share_buff.state.key_root=key_root; share_buff.state.key_del=key_del; - share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff)); + share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff), + dflt_key_cache); DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open", if (strstr(name, "/t1")) @@ -211,7 +212,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) len,MI_BASE_INFO_SIZE)); } disk_pos= (char*) - my_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); + mi_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); share->state.state_length=base_pos; if (!(open_flags & HA_OPEN_FOR_REPAIR) && @@ -233,8 +234,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) } key_parts+=fulltext_keys*FT_SEGS; - if (share->base.max_key_length > MI_MAX_KEY_BUFF || keys > MI_MAX_KEY || - key_parts >= MI_MAX_KEY * MI_MAX_KEY_SEG) + if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MI_MAX_KEY || + key_parts >= MI_MAX_KEY * HA_MAX_KEY_SEG) { DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); my_errno=HA_ERR_UNSUPPORTED; @@ -990,7 +991,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base) } -uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) +uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) { base->keystart = mi_sizekorr(ptr); ptr +=8; base->max_data_file_length = mi_sizekorr(ptr); ptr +=8; diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c index aa6ea016070..89b0833742e 100644 --- a/storage/myisam/mi_packrec.c +++ b/storage/myisam/mi_packrec.c @@ -102,6 +102,7 @@ static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length); static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count); static void fill_buffer(MI_BIT_BUFF *bit_buff); static uint max_bit(uint value); +static uint read_pack_length(uint version, const uchar *buf, ulong *length); #ifdef HAVE_MMAP static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, uchar *header); @@ -775,7 +776,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, return; } decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length); - _my_store_blob_length((byte*) to,pack_length,length); + _mi_store_blob_length((byte*) to,pack_length,length); memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, sizeof(char*)); bit_buff->blob_pos+=length; @@ -1178,7 +1179,6 @@ static int _mi_read_rnd_mempack_record(MI_INFO*, byte *,my_off_t, my_bool); my_bool _mi_memmap_file(MI_INFO *info) { - byte *file_map; MYISAM_SHARE *share=info->s; DBUG_ENTER("mi_memmap_file"); @@ -1315,7 +1315,7 @@ uint save_pack_length(uint version, byte *block_buff, ulong length) } -uint read_pack_length(uint version, const uchar *buf, ulong *length) +static uint read_pack_length(uint version, const uchar *buf, ulong *length) { if (buf[0] < 254) { diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c index e78f3b11625..1c41c96e95f 100644 --- a/storage/myisam/mi_range.c +++ b/storage/myisam/mi_range.c @@ -217,7 +217,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *keypos, uint *ret_max_key) { uint nod_flag,keynr,max_key; - uchar t_buff[MI_MAX_KEY_BUFF],*end; + uchar t_buff[HA_MAX_KEY_BUFF],*end; end= page+mi_getint(page); nod_flag=mi_test_if_nod(page); diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c index a6c2cbd6082..b2c5cba45d9 100644 --- a/storage/myisam/mi_search.c +++ b/storage/myisam/mi_search.c @@ -61,7 +61,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, int error,flag; uint nod_flag; uchar *keypos,*maxpos; - uchar lastkey[MI_MAX_KEY_BUFF],*buff; + uchar lastkey[HA_MAX_KEY_BUFF],*buff; DBUG_ENTER("_mi_search"); DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", (ulong) pos, nextflag, (ulong) info->lastpos)); @@ -243,7 +243,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, { int flag; uint nod_flag,length,not_used[2]; - uchar t_buff[MI_MAX_KEY_BUFF],*end; + uchar t_buff[HA_MAX_KEY_BUFF],*end; DBUG_ENTER("_mi_seq_search"); LINT_INIT(flag); LINT_INIT(length); @@ -296,7 +296,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, int key_len_skip, seg_len_pack, key_len_left; uchar *end, *kseg, *vseg; uchar *sort_order=keyinfo->seg->charset->sort_order; - uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; uchar *saved_from, *saved_to, *saved_vseg; uint saved_length=0, saved_prefix_len=0; uint length_pack; @@ -919,7 +919,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_ENTER("_mi_get_binary_pack_key"); page= *page_pos; - page_end=page+MI_MAX_KEY_BUFF+1; + page_end=page+HA_MAX_KEY_BUFF+1; start_key=key; /* @@ -1207,7 +1207,7 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo, { int error; uint nod_flag; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_search_next"); DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu", nextflag, (ulong) info->lastpos, diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c index 0e62b074376..9f7c55444c1 100644 --- a/storage/myisam/mi_test1.c +++ b/storage/myisam/mi_test1.c @@ -628,7 +628,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), key_type= HA_KEYTYPE_VARTEXT1; break; case 'k': - if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH) + if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) { fprintf(stderr,"Wrong key length\n"); exit(1); diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index e77a37d853f..fb118f8cad2 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -813,7 +813,7 @@ end: printf("Write records: %d\nUpdate records: %d\nSame-key-read: %d\nDelete records: %d\n", write_count,update,dupp_keys,opt_delete); if (rec_pointer_size) printf("Record pointer size: %d\n",rec_pointer_size); - printf("myisam_block_size: %u\n", myisam_block_size); + printf("myisam_block_size: %lu\n", myisam_block_size); if (key_cacheing) { puts("Key cache used"); diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c index b698968127b..2779577e317 100644 --- a/storage/myisam/mi_unique.c +++ b/storage/myisam/mi_unique.c @@ -57,7 +57,7 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record, if (_mi_search_next(info,info->s->keyinfo+def->key, info->lastkey, MI_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, info->s->state.key_root[def->key]) || - bcmp(info->lastkey, key_buff, MI_UNIQUE_HASH_LENGTH)) + memcmp((char*) info->lastkey, (char*) key_buff, MI_UNIQUE_HASH_LENGTH)) { info->page_changed=1; /* Can't optimize read next */ info->lastpos=lastpos; @@ -213,7 +213,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || type == HA_KEYTYPE_VARTEXT2) { - if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length, (uchar *) pos_b, b_length, 0, 1)) return 1; } diff --git a/storage/myisam/mi_update.c b/storage/myisam/mi_update.c index 937c9983b45..063c48bb7f0 100644 --- a/storage/myisam/mi_update.c +++ b/storage/myisam/mi_update.c @@ -24,7 +24,7 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) int flag,key_changed,save_errno; reg3 my_off_t pos; uint i; - uchar old_key[MI_MAX_KEY_BUFF],*new_key; + uchar old_key[HA_MAX_KEY_BUFF],*new_key; bool auto_key_changed=0; ulonglong changed; MYISAM_SHARE *share=info->s; diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c index 5e79b2937cc..6291d176612 100644 --- a/storage/myisam/mi_write.c +++ b/storage/myisam/mi_write.c @@ -334,7 +334,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, int error,flag; uint nod_flag, search_key_length; uchar *temp_buff,*keypos; - uchar keybuff[MI_MAX_KEY_BUFF]; + uchar keybuff[HA_MAX_KEY_BUFF]; my_bool was_last_key; my_off_t next_page, dupp_key_pos; DBUG_ENTER("w_search"); @@ -342,7 +342,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_mi_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) goto err; @@ -533,7 +533,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, get_key_length(alen,a); DBUG_ASSERT(info->ft1_to_ft2==0); if (alen == blen && - mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) + ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) { /* yup. converting */ info->ft1_to_ft2=(DYNAMIC_ARRAY *) @@ -693,7 +693,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page, { uint keys,length,last_length,key_ref_length; uchar *end,*lastpos,*prevpos; - uchar key_buff[MI_MAX_KEY_BUFF]; + uchar key_buff[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_find_last_pos"); key_ref_length=2; @@ -749,7 +749,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, length,keys; uchar *pos,*buff,*extra_buff; my_off_t next_page,new_pos; - byte tmp_part_key[MI_MAX_KEY_BUFF]; + byte tmp_part_key[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_balance_page"); k_length=keyinfo->keylength; @@ -915,7 +915,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) Probably I can use info->lastkey here, but I'm not sure, and to be safe I'd better use local lastkey. */ - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; uint keylen; MI_KEYDEF *keyinfo; diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index e2c8b446322..192500e432e 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -17,7 +17,6 @@ /* Describe, check and repair of MyISAM tables */ #include "fulltext.h" - #include #include #include @@ -72,9 +71,9 @@ static const char *myisam_stats_method_str="nulls_unequal"; static void get_options(int *argc,char * * *argv); static void print_version(void); static void usage(void); -static int myisamchk(MI_CHECK *param, char *filename); -static void descript(MI_CHECK *param, register MI_INFO *info, my_string name); -static int mi_sort_records(MI_CHECK *param, register MI_INFO *info, +static int myisamchk(HA_CHECK *param, char *filename); +static void descript(HA_CHECK *param, register MI_INFO *info, my_string name); +static int mi_sort_records(HA_CHECK *param, register MI_INFO *info, my_string name, uint sort_key, my_bool write_info, my_bool update_index); static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info, @@ -82,7 +81,7 @@ static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info, my_off_t page,uchar *buff,uint sortkey, File new_file, my_bool update_index); -MI_CHECK check_param; +HA_CHECK check_param; /* Main program */ @@ -704,7 +703,7 @@ get_one_option(int optid, case OPT_STATS_METHOD: { int method; - enum_mi_stats_method method_conv; + enum_handler_stats_method method_conv; myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) { @@ -801,7 +800,7 @@ static void get_options(register int *argc,register char ***argv) /* Check table */ -static int myisamchk(MI_CHECK *param, my_string filename) +static int myisamchk(HA_CHECK *param, my_string filename) { int error,lock_type,recreate; int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); @@ -1206,7 +1205,7 @@ end2: /* Write info about table */ -static void descript(MI_CHECK *param, register MI_INFO *info, my_string name) +static void descript(HA_CHECK *param, register MI_INFO *info, my_string name) { uint key,keyseg_nr,field,start; reg3 MI_KEYDEF *keyinfo; @@ -1471,7 +1470,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, my_string name) /* Sort records according to one key */ -static int mi_sort_records(MI_CHECK *param, +static int mi_sort_records(HA_CHECK *param, register MI_INFO *info, my_string name, uint sort_key, my_bool write_info, @@ -1485,7 +1484,7 @@ static int mi_sort_records(MI_CHECK *param, ha_rows old_record_count; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; MI_SORT_PARAM sort_param; DBUG_ENTER("sort_records"); @@ -1660,10 +1659,10 @@ static int sort_record_index(MI_SORT_PARAM *sort_param,MI_INFO *info, uint nod_flag,used_length,key_length; uchar *temp_buff,*keypos,*endpos; my_off_t next_page,rec_pos; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; char llbuff[22]; - SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_record_index"); nod_flag=mi_test_if_nod(buff); @@ -1751,7 +1750,7 @@ err: static int not_killed= 0; -volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) +volatile int *killed_ptr(HA_CHECK *param __attribute__((unused))) { return ¬_killed; /* always NULL */ } @@ -1759,7 +1758,7 @@ volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) /* print warnings and errors */ /* VARARGS */ -void mi_check_print_info(MI_CHECK *param __attribute__((unused)), +void mi_check_print_info(HA_CHECK *param __attribute__((unused)), const char *fmt,...) { va_list args; @@ -1772,7 +1771,7 @@ void mi_check_print_info(MI_CHECK *param __attribute__((unused)), /* VARARGS */ -void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("mi_check_print_warning"); @@ -1797,7 +1796,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) /* VARARGS */ -void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +void mi_check_print_error(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("mi_check_print_error"); diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index caf6254c321..44b868fedb2 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -16,8 +16,8 @@ /* This file is included by all internal myisam files */ -#include "myisam.h" /* Structs & some defines */ -#include "myisampack.h" /* packing of keys */ +#include "myisam.h" /* Structs & some defines */ +#include "myisampack.h" /* packing of keys */ #include #ifdef THREAD #include @@ -27,15 +27,16 @@ #endif #if defined(my_write) && !defined(MAP_TO_USE_RAID) -#undef my_write /* undef map from my_nosys; We need test-if-disk full */ +/* undef map from my_nosys; We need test-if-disk full */ +#undef my_write #endif typedef struct st_mi_status_info { - ha_rows records; /* Rows in table */ - ha_rows del; /* Removed rows */ - my_off_t empty; /* lost space in datafile */ - my_off_t key_empty; /* lost space in indexfile */ + ha_rows records; /* Rows in table */ + ha_rows del; /* Removed rows */ + my_off_t empty; /* lost space in datafile */ + my_off_t key_empty; /* lost space in indexfile */ my_off_t key_file_length; my_off_t data_file_length; ha_checksum checksum; @@ -43,333 +44,293 @@ typedef struct st_mi_status_info typedef struct st_mi_state_info { - struct { /* Fileheader */ + struct + { /* Fileheader */ uchar file_version[4]; uchar options[2]; uchar header_length[2]; uchar state_info_length[2]; uchar base_info_length[2]; uchar base_pos[2]; - uchar key_parts[2]; /* Key parts */ - uchar unique_key_parts[2]; /* Key parts + unique parts */ - uchar keys; /* number of keys in file */ - uchar uniques; /* number of UNIQUE definitions */ - uchar language; /* Language for indexes */ - uchar max_block_size; /* max keyblock size */ + uchar key_parts[2]; /* Key parts */ + uchar unique_key_parts[2]; /* Key parts + unique parts */ + uchar keys; /* number of keys in file */ + uchar uniques; /* number of UNIQUE definitions */ + uchar language; /* Language for indexes */ + uchar max_block_size; /* max keyblock size */ uchar fulltext_keys; uchar not_used; /* To align to 8 */ } header; MI_STATUS_INFO state; - ha_rows split; /* number of split blocks */ - my_off_t dellink; /* Link to next removed block */ + ha_rows split; /* number of split blocks */ + my_off_t dellink; /* Link to next removed block */ ulonglong auto_increment; - ulong process; /* process that updated table last */ - ulong unique; /* Unique number for this process */ - ulong update_count; /* Updated for each write lock */ + ulong process; /* process that updated table last */ + ulong unique; /* Unique number for this process */ + ulong update_count; /* Updated for each write lock */ ulong status; ulong *rec_per_key_part; - my_off_t *key_root; /* Start of key trees */ - my_off_t *key_del; /* delete links for trees */ - my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ + my_off_t *key_root; /* Start of key trees */ + my_off_t *key_del; /* delete links for trees */ + my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ - ulong sec_index_changed; /* Updated when new sec_index */ - ulong sec_index_used; /* which extra index are in use */ - ulonglong key_map; /* Which keys are in use */ - ulong version; /* timestamp of create */ - time_t create_time; /* Time when created database */ - time_t recover_time; /* Time for last recover */ - time_t check_time; /* Time for last check */ - uint sortkey; /* sorted by this key (not used) */ + ulong sec_index_changed; /* Updated when new sec_index */ + ulong sec_index_used; /* which extra index are in use */ + ulonglong key_map; /* Which keys are in use */ + ulong version; /* timestamp of create */ + time_t create_time; /* Time when created database */ + time_t recover_time; /* Time for last recover */ + time_t check_time; /* Time for last check */ + uint sortkey; /* sorted by this key (not used) */ uint open_count; - uint8 changed; /* Changed since myisamchk */ + uint8 changed; /* Changed since myisamchk */ /* the following isn't saved on disk */ - uint state_diff_length; /* Should be 0 */ - uint state_length; /* Length of state header in file */ + uint state_diff_length; /* Should be 0 */ + uint state_length; /* Length of state header in file */ ulong *key_info; } MI_STATE_INFO; -#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) -#define MI_STATE_KEY_SIZE 8 +#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) +#define MI_STATE_KEY_SIZE 8 #define MI_STATE_KEYBLOCK_SIZE 8 -#define MI_STATE_KEYSEG_SIZE 4 -#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*MI_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) -#define MI_KEYDEF_SIZE (2+ 5*2) -#define MI_UNIQUEDEF_SIZE (2+1+1) -#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) -#define MI_COLUMNDEF_SIZE (2*3+1) -#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) -#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ +#define MI_STATE_KEYSEG_SIZE 4 +#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*HA_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) +#define MI_KEYDEF_SIZE (2+ 5*2) +#define MI_UNIQUEDEF_SIZE (2+1+1) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define MI_COLUMNDEF_SIZE (2*3+1) +#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) +#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ typedef struct st_mi_base_info { - my_off_t keystart; /* Start of keys */ + my_off_t keystart; /* Start of keys */ my_off_t max_data_file_length; my_off_t max_key_file_length; my_off_t margin_key_file_length; - ha_rows records,reloc; /* Create information */ - ulong mean_row_length; /* Create information */ - ulong reclength; /* length of unpacked record */ - ulong pack_reclength; /* Length of full packed rec. */ + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec. */ ulong min_pack_length; - ulong max_pack_length; /* Max possibly length of packed rec.*/ + ulong max_pack_length; /* Max possibly length of packed rec.*/ ulong min_block_length; - ulong fields, /* fields in table */ - pack_fields; /* packed fields in table */ - uint rec_reflength; /* = 2-8 */ - uint key_reflength; /* = 2-8 */ - uint keys; /* same as in state.header */ - uint auto_key; /* Which key-1 is a auto key */ - uint blobs; /* Number of blobs */ - uint pack_bits; /* Length of packed bits */ - uint max_key_block_length; /* Max block length */ - uint max_key_length; /* Max key length */ + ulong fields, /* fields in table */ + pack_fields; /* packed fields in table */ + uint rec_reflength; /* = 2-8 */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + uint pack_bits; /* Length of packed bits */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ /* Extra allocation when using dynamic record format */ uint extra_alloc_bytes; uint extra_alloc_procent; /* Info about raid */ - uint raid_type,raid_chunks; + uint raid_type, raid_chunks; ulong raid_chunksize; /* The following are from the header */ - uint key_parts,all_key_parts; + uint key_parts, all_key_parts; } MI_BASE_INFO; - /* Structs used intern in database */ + /* Structs used intern in database */ -typedef struct st_mi_blob /* Info of record */ +typedef struct st_mi_blob /* Info of record */ { - ulong offset; /* Offset to blob in record */ - uint pack_length; /* Type of packed length */ - ulong length; /* Calc:ed for each record */ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ } MI_BLOB; -typedef struct st_mi_isam_pack { +typedef struct st_mi_isam_pack +{ ulong header_length; uint ref_length; uchar version; } MI_PACK; -#define MAX_NONMAPPED_INSERTS 1000 +#define MAX_NONMAPPED_INSERTS 1000 -typedef struct st_mi_isam_share { /* Shared between opens */ +typedef struct st_mi_isam_share +{ /* Shared between opens */ MI_STATE_INFO state; MI_BASE_INFO base; - MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ - MI_KEYDEF *keyinfo; /* Key definitions */ - MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ - HA_KEYSEG *keyparts; /* key part info */ - MI_COLUMNDEF *rec; /* Pointer to field information */ - MI_PACK pack; /* Data about packed records */ - MI_BLOB *blobs; /* Pointer to blobs */ - char *unique_file_name; /* realpath() of index file */ - char *data_file_name, /* Resolved path names from symlinks */ - *index_file_name; - byte *file_map; /* mem-map of file if possible */ - KEY_CACHE *key_cache; /* ref to the current key cache */ + MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ + MI_KEYDEF *keyinfo; /* Key definitions */ + MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ + HA_KEYSEG *keyparts; /* key part info */ + MI_COLUMNDEF *rec; /* Pointer to field information */ + MI_PACK pack; /* Data about packed records */ + MI_BLOB *blobs; /* Pointer to blobs */ + char *unique_file_name; /* realpath() of index file */ + char *data_file_name, /* Resolved path names from symlinks */ + *index_file_name; + byte *file_map; /* mem-map of file if possible */ + KEY_CACHE *key_cache; /* ref to the current key cache */ MI_DECODE_TREE *decode_trees; uint16 *decode_tables; - int (*read_record)(struct st_myisam_info*, my_off_t, byte*); - int (*write_record)(struct st_myisam_info*, const byte*); - int (*update_record)(struct st_myisam_info*, my_off_t, const byte*); - int (*delete_record)(struct st_myisam_info*); - int (*read_rnd)(struct st_myisam_info*, byte*, my_off_t, my_bool); - int (*compare_record)(struct st_myisam_info*, const byte *); - ha_checksum (*calc_checksum)(struct st_myisam_info*, const byte *); - int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *, - const byte *record, my_off_t pos); - uint (*file_read)(MI_INFO *, byte *, uint, my_off_t, myf); - uint (*file_write)(MI_INFO *, byte *, uint, my_off_t, myf); + int(*read_record) (struct st_myisam_info *, my_off_t, byte *); + int(*write_record) (struct st_myisam_info *, const byte *); + int(*update_record) (struct st_myisam_info *, my_off_t, const byte *); + int(*delete_record) (struct st_myisam_info *); + int(*read_rnd) (struct st_myisam_info *, byte *, my_off_t, my_bool); + int(*compare_record) (struct st_myisam_info *, const byte *); + ha_checksum(*calc_checksum) (struct st_myisam_info *, const byte *); + int(*compare_unique) (struct st_myisam_info *, MI_UNIQUEDEF *, + const byte * record, my_off_t pos); + uint(*file_read) (MI_INFO *, byte *, uint, my_off_t, myf); + uint(*file_write) (MI_INFO *, byte *, uint, my_off_t, myf); invalidator_by_filename invalidator; /* query cache invalidator */ - ulong this_process; /* processid */ - ulong last_process; /* For table-change-check */ - ulong last_version; /* Version on start */ - ulong options; /* Options used */ - ulong min_pack_length; /* Theese are used by packed data */ + ulong this_process; /* processid */ + ulong last_process; /* For table-change-check */ + ulong last_version; /* Version on start */ + ulong options; /* Options used */ + ulong min_pack_length; /* Theese are used by packed data */ ulong max_pack_length; ulong state_diff_length; - uint rec_reflength; /* rec_reflength in use now */ - uint unique_name_length; + uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; uint32 ftparsers; /* Number of distinct ftparsers + 1 */ - File kfile; /* Shared keyfile */ - File data_file; /* Shared data file */ - int mode; /* mode of file on open */ - uint reopen; /* How many times reopened */ - uint w_locks,r_locks,tot_locks; /* Number of read/write locks */ - uint blocksize; /* blocksize of keyfile */ + File kfile; /* Shared keyfile */ + File data_file; /* Shared data file */ + int mode; /* mode of file on open */ + uint reopen; /* How many times reopened */ + uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ + uint blocksize; /* blocksize of keyfile */ myf write_flag; enum data_file_type data_file_type; - my_bool changed, /* If changed since lock */ - global_changed, /* If changed since open */ - not_flushed, - temporary,delay_key_write, - concurrent_insert; + my_bool changed, /* If changed since lock */ + global_changed, /* If changed since open */ + not_flushed, temporary, delay_key_write, concurrent_insert; #ifdef THREAD THR_LOCK lock; - pthread_mutex_t intern_lock; /* Locking for use with _locking */ + pthread_mutex_t intern_lock; /* Locking for use with _locking */ rw_lock_t *key_root_lock; #endif my_off_t mmaped_length; - uint nonmmaped_inserts; /* counter of writing in non-mmaped - area */ + /* counter of writing in non-mmaped area */ + uint nonmmaped_inserts; rw_lock_t mmap_lock; } MYISAM_SHARE; typedef uint mi_bit_type; -typedef struct st_mi_bit_buff { /* Used for packing of record */ +typedef struct st_mi_bit_buff +{ /* Used for packing of record */ mi_bit_type current_byte; uint bits; - uchar *pos,*end,*blob_pos,*blob_end; + uchar *pos, *end, *blob_pos, *blob_end; uint error; } MI_BIT_BUFF; -struct st_myisam_info { - MYISAM_SHARE *s; /* Shared between open:s */ - MI_STATUS_INFO *state,save_state; - MI_BLOB *blobs; /* Pointer to blobs */ - MI_BIT_BUFF bit_buff; +struct st_myisam_info +{ + MYISAM_SHARE *s; /* Shared between open:s */ + MI_STATUS_INFO *state, save_state; + MI_BLOB *blobs; /* Pointer to blobs */ + MI_BIT_BUFF bit_buff; /* accumulate indexfile changes between write's */ - TREE *bulk_insert; + TREE *bulk_insert; DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ - MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ - char *filename; /* parameter to open filename */ - uchar *buff, /* Temp area for key */ - *lastkey,*lastkey2; /* Last used search key */ - uchar *first_mbr_key; /* Searhed spatial key */ - byte *rec_buff; /* Tempbuff for recordpack */ - uchar *int_keypos, /* Save position for next/previous */ - *int_maxpos; /* -""- */ - uint int_nod_flag; /* -""- */ - uint32 int_keytree_version; /* -""- */ - int (*read_record)(struct st_myisam_info*, my_off_t, byte*); + MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + char *filename; /* parameter to open filename */ + uchar *buff, /* Temp area for key */ + *lastkey, *lastkey2; /* Last used search key */ + uchar *first_mbr_key; /* Searhed spatial key */ + byte *rec_buff; /* Tempbuff for recordpack */ + uchar *int_keypos, /* Save position for next/previous */ + *int_maxpos; /* -""- */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ + int(*read_record) (struct st_myisam_info *, my_off_t, byte *); invalidator_by_filename invalidator; /* query cache invalidator */ - ulong this_unique; /* uniq filenumber or thread */ - ulong last_unique; /* last unique number */ - ulong this_loop; /* counter for this open */ - ulong last_loop; /* last used counter */ - my_off_t lastpos, /* Last record position */ - nextpos; /* Position to next record */ + ulong this_unique; /* uniq filenumber or thread */ + ulong last_unique; /* last unique number */ + ulong this_loop; /* counter for this open */ + ulong last_loop; /* last used counter */ + my_off_t lastpos, /* Last record position */ + nextpos; /* Position to next record */ my_off_t save_lastpos; - my_off_t pos; /* Intern variable */ - my_off_t last_keypage; /* Last key page read */ - my_off_t last_search_keypage; /* Last keypage when searching */ + my_off_t pos; /* Intern variable */ + my_off_t last_keypage; /* Last key page read */ + my_off_t last_search_keypage; /* Last keypage when searching */ my_off_t dupp_key_pos; ha_checksum checksum; - /* QQ: the folloing two xxx_length fields should be removed, - as they are not compatible with parallel repair */ - ulong packed_length,blob_length; /* Length of found, packed record */ - int dfile; /* The datafile */ - uint opt_flag; /* Optim. for space/speed */ - uint update; /* If file changed since open */ - int lastinx; /* Last used index */ - uint lastkey_length; /* Length of key in lastkey */ - uint last_rkey_length; /* Last length in mi_rkey() */ + /* + QQ: the folloing two xxx_length fields should be removed, + as they are not compatible with parallel repair + */ + ulong packed_length, blob_length; /* Length of found, packed record */ + int dfile; /* The datafile */ + uint opt_flag; /* Optim. for space/speed */ + uint update; /* If file changed since open */ + int lastinx; /* Last used index */ + uint lastkey_length; /* Length of key in lastkey */ + uint last_rkey_length; /* Last length in mi_rkey() */ enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ - uint save_lastkey_length; - uint pack_key_length; /* For MYISAMMRG */ - int errkey; /* Got last error on this key */ - int lock_type; /* How database was locked */ - int tmp_lock_type; /* When locked by readinfo */ - uint data_changed; /* Somebody has changed data */ - uint save_update; /* When using KEY_READ */ - int save_lastinx; - LIST open_list; - IO_CACHE rec_cache; /* When cacheing records */ - uint preload_buff_size; /* When preloading indexes */ - myf lock_wait; /* is 0 or MY_DONT_WAIT */ - my_bool was_locked; /* Was locked in panic */ - my_bool append_insert_at_end; /* Set if concurrent insert */ + uint save_lastkey_length; + uint pack_key_length; /* For MYISAMMRG */ + int errkey; /* Got last error on this key */ + int lock_type; /* How database was locked */ + int tmp_lock_type; /* When locked by readinfo */ + uint data_changed; /* Somebody has changed data */ + uint save_update; /* When using KEY_READ */ + int save_lastinx; + LIST open_list; + IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ + myf lock_wait; /* is 0 or MY_DONT_WAIT */ + my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool quick_mode; - my_bool page_changed; /* If info->buff can't be used for rnext */ - my_bool buff_used; /* If info->buff has to be reread for rnext */ - my_bool once_flags; /* For MYISAMMRG */ + /* If info->buff can't be used for rnext */ + my_bool page_changed; + /* If info->buff has to be reread for rnext */ + my_bool buff_used; + my_bool once_flags; /* For MYISAMMRG */ #ifdef THREAD THR_LOCK_DATA lock; #endif - uchar *rtree_recursion_state; /* For RTREE */ - int rtree_recursion_depth; + uchar *rtree_recursion_state; /* For RTREE */ + int rtree_recursion_depth; }; -typedef struct st_buffpek { - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* Key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; +#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ +#define F_EXTRA_LCK -1 -typedef struct st_mi_sort_param -{ - pthread_t thr; - IO_CACHE read_cache, tempfile, tempfile_for_exceptions; - DYNAMIC_ARRAY buffpek; - - /* - The next two are used to collect statistics, see update_key_parts for - description. - */ - ulonglong unique[MI_MAX_KEY_SEG+1]; - ulonglong notnull[MI_MAX_KEY_SEG+1]; - - my_off_t pos,max_pos,filepos,start_recpos; - uint key, key_length,real_key_length,sortbuff_size; - uint maxbuffers, keys, find_length, sort_keys_length; - my_bool fix_datafile, master; - MI_KEYDEF *keyinfo; - HA_KEYSEG *seg; - SORT_INFO *sort_info; - uchar **sort_keys; - byte *rec_buff; - void *wordlist, *wordptr; - char *record; - MY_TMPDIR *tmpdir; - int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); - int (*key_read)(struct st_mi_sort_param *,void *); - int (*key_write)(struct st_mi_sort_param *, const void *); - void (*lock_in_memory)(MI_CHECK *); - NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, - uint , struct st_buffpek *, IO_CACHE *); - NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); - NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, - uint, uint); -} MI_SORT_PARAM; - /* Some defines used by isam-funktions */ - -#define USE_WHOLE_KEY MI_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ -#define F_EXTRA_LCK -1 - - /* bits in opt_flag */ -#define MEMMAP_USED 32 +/* bits in opt_flag */ +#define MEMMAP_USED 32 #define REMEMBER_OLD_POS 64 -#define WRITEINFO_UPDATE_KEYFILE 1 -#define WRITEINFO_NO_UNLOCK 2 +#define WRITEINFO_UPDATE_KEYFILE 1 +#define WRITEINFO_NO_UNLOCK 2 - /* once_flags */ +/* once_flags */ #define USE_PACKED_KEYS 1 #define RRND_PRESERVE_LASTINX 2 - /* bits in state.changed */ - -#define STATE_CHANGED 1 -#define STATE_CRASHED 2 +/* bits in state.changed */ +#define STATE_CHANGED 1 +#define STATE_CRASHED 2 #define STATE_CRASHED_ON_REPAIR 4 -#define STATE_NOT_ANALYZED 8 +#define STATE_NOT_ANALYZED 8 #define STATE_NOT_OPTIMIZED_KEYS 16 -#define STATE_NOT_SORTED_PAGES 32 +#define STATE_NOT_SORTED_PAGES 32 - /* options to mi_read_cache */ +/* options to mi_read_cache */ +#define READING_NEXT 1 +#define READING_HEADER 2 -#define READING_NEXT 1 -#define READING_HEADER 2 - -#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767) +#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767) #define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ - mi_int2store(x,boh); } + mi_int2store(x,boh); } #define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) #define mi_mark_crashed(x) (x)->s->state.changed|=STATE_CRASHED #define mi_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; } @@ -380,13 +341,6 @@ typedef struct st_mi_sort_param /* Functions to store length of space packed keys, VARCHAR or BLOB keys */ -#define store_key_length_inc(key,length) \ -{ if ((length) < 255) \ - { *(key)++=(length); } \ - else \ - { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \ -} - #define store_key_length(key,length) \ { if ((length) < 255) \ { *(key)=(length); } \ @@ -410,39 +364,39 @@ typedef struct st_mi_sort_param #define get_pack_length(length) ((length) >= 255 ? 3 : 1) -#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ -#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ -#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) -#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */ +#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ +#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ +#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) +#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */ #define MI_BLOCK_INFO_HEADER_LENGTH 20 -#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ -#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) -#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH) -#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */ -#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ -#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1))) +#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ +#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) +#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH) +#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */ +#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ +#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1))) #define MI_REC_BUFF_OFFSET ALIGN_SIZE(MI_DYN_DELETE_BLOCK_HEADER+sizeof(uint32)) -#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ +#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ -#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ -#define PACK_TYPE_SPACE_FIELDS 2 -#define PACK_TYPE_ZERO_FILL 4 -#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ +#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ +#define PACK_TYPE_SPACE_FIELDS 2 +#define PACK_TYPE_ZERO_FILL 4 +#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ -#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) +#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) #define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/myisam_block_size+1)*myisam_block_size) -#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ -#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ +#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ +#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ -#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ +#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ #define MI_MIN_ROWS_TO_USE_BULK_INSERT 100 #define MI_MIN_ROWS_TO_DISABLE_INDEXES 100 #define MI_MIN_ROWS_TO_USE_WRITE_CACHE 10 /* The UNIQUE check is done with a hashed long key */ -#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT +#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT #define mi_unique_store(A,B) mi_int4store((A),(B)) #ifdef THREAD @@ -454,174 +408,181 @@ extern pthread_mutex_t THR_LOCK_myisam; #define rw_unlock(A) {} #endif - /* Some extern variables */ +/* Some extern variables */ extern LIST *myisam_open_list; -extern uchar NEAR myisam_file_magic[],NEAR myisam_pack_file_magic[]; -extern uint NEAR myisam_read_vec[],NEAR myisam_readnext_vec[]; +extern uchar NEAR myisam_file_magic[], NEAR myisam_pack_file_magic[]; +extern uint NEAR myisam_read_vec[], NEAR myisam_readnext_vec[]; extern uint myisam_quick_table_bits; extern File myisam_log_file; extern ulong myisam_pid; - /* This is used by _mi_calc_xxx_key_length och _mi_store_key */ +/* This is used by _mi_calc_xxx_key_length och _mi_store_key */ typedef struct st_mi_s_param { - uint ref_length,key_length, - n_ref_length, - n_length, - totlength, - part_of_prev_key,prev_length,pack_marker; - uchar *key, *prev_key,*next_key_pos; - bool store_not_null; + uint ref_length, key_length, + n_ref_length, + n_length, totlength, part_of_prev_key, prev_length, pack_marker; + uchar *key, *prev_key, *next_key_pos; + bool store_not_null; } MI_KEY_PARAM; - /* Prototypes for intern functions */ +/* Prototypes for intern functions */ -extern int _mi_read_dynamic_record(MI_INFO *info,my_off_t filepos,byte *buf); -extern int _mi_write_dynamic_record(MI_INFO*, const byte*); -extern int _mi_update_dynamic_record(MI_INFO*, my_off_t, const byte*); +extern int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_write_dynamic_record(MI_INFO *, const byte *); +extern int _mi_update_dynamic_record(MI_INFO *, my_off_t, const byte *); extern int _mi_delete_dynamic_record(MI_INFO *info); -extern int _mi_cmp_dynamic_record(MI_INFO *info,const byte *record); -extern int _mi_read_rnd_dynamic_record(MI_INFO *, byte *,my_off_t, my_bool); -extern int _mi_write_blob_record(MI_INFO*, const byte*); -extern int _mi_update_blob_record(MI_INFO*, my_off_t, const byte*); -extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos,byte *buf); -extern int _mi_write_static_record(MI_INFO*, const byte*); -extern int _mi_update_static_record(MI_INFO*, my_off_t, const byte*); +extern int _mi_cmp_dynamic_record(MI_INFO *info, const byte *record); +extern int _mi_read_rnd_dynamic_record(MI_INFO *, byte *, my_off_t, my_bool); +extern int _mi_write_blob_record(MI_INFO *, const byte *); +extern int _mi_update_blob_record(MI_INFO *, my_off_t, const byte *); +extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_write_static_record(MI_INFO *, const byte *); +extern int _mi_update_static_record(MI_INFO *, my_off_t, const byte *); extern int _mi_delete_static_record(MI_INFO *info); -extern int _mi_cmp_static_record(MI_INFO *info,const byte *record); -extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool); -extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length); +extern int _mi_cmp_static_record(MI_INFO *info, const byte *record); +extern int _mi_read_rnd_static_record(MI_INFO *, byte *, my_off_t, my_bool); +extern int _mi_ck_write(MI_INFO *info, uint keynr, uchar *key, uint length); extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, uint key_length, my_off_t *root, uint comp_flag); -extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root); -extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uchar *anc_buff,uchar *key_pos,uchar *key_buff, - uchar *father_buff, uchar *father_keypos, - my_off_t father_page, my_bool insert_last); -extern int _mi_split_page(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uchar *buff,uchar *key_buff, my_bool insert_last); -extern uchar *_mi_find_half_pos(uint nod_flag,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint *return_key_length, - uchar **after_key); -extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *prev_key, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos,uchar *org_key, - uchar *prev_key, - uchar *key, MI_KEY_PARAM *s_temp); -void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); -void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +extern int _mi_enlarge_root(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + my_off_t *root); +extern int _mi_insert(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uchar *anc_buff, uchar *key_pos, uchar *key_buff, + uchar *father_buff, uchar *father_keypos, + my_off_t father_page, my_bool insert_last); +extern int _mi_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uchar *buff, uchar *key_buff, my_bool insert_last); +extern uchar *_mi_find_half_pos(uint nod_flag, MI_KEYDEF *keyinfo, + uchar *page, uchar *key, + uint *return_key_length, uchar ** after_key); +extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *prev_key, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *prev_key, uchar *key, + MI_KEY_PARAM *s_temp); +void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); +void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); #ifdef NOT_USED -void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); #endif -void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); -extern int _mi_ck_delete(MI_INFO *info,uint keynr,uchar *key,uint key_length); -extern int _mi_readinfo(MI_INFO *info,int lock_flag,int check_keybuffer); -extern int _mi_writeinfo(MI_INFO *info,uint options); +extern int _mi_ck_delete(MI_INFO *info, uint keynr, uchar *key, + uint key_length); +extern int _mi_readinfo(MI_INFO *info, int lock_flag, int check_keybuffer); +extern int _mi_writeinfo(MI_INFO *info, uint options); extern int _mi_test_if_changed(MI_INFO *info); extern int _mi_mark_file_changed(MI_INFO *info); extern int _mi_decrement_open_count(MI_INFO *info); -extern int _mi_check_index(MI_INFO *info,int inx); -extern int _mi_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,uint key_len, - uint nextflag,my_off_t pos); -extern int _mi_bin_search(struct st_myisam_info *info,MI_KEYDEF *keyinfo, - uchar *page,uchar *key,uint key_len,uint comp_flag, - uchar * *ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_seq_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint key_len,uint comp_flag, - uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_prefix_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint key_len,uint comp_flag, - uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern my_off_t _mi_kpos(uint nod_flag,uchar *after_key); -extern void _mi_kpointer(MI_INFO *info,uchar *buff,my_off_t pos); -extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag,uchar *after_key); +extern int _mi_check_index(MI_INFO *info, int inx); +extern int _mi_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_len, uint nextflag, my_off_t pos); +extern int _mi_bin_search(struct st_myisam_info *info, MI_KEYDEF *keyinfo, + uchar *page, uchar *key, uint key_len, + uint comp_flag, uchar **ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _mi_seq_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, + uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _mi_prefix_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, + uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern my_off_t _mi_kpos(uint nod_flag, uchar *after_key); +extern void _mi_kpointer(MI_INFO *info, uchar *buff, my_off_t pos); +extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag, uchar *after_key); extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr); -extern void _mi_dpointer(MI_INFO *info, uchar *buff,my_off_t pos); -extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a,uchar *b, - uint key_length,uint nextflag,uint *diff_length); -extern uint _mi_get_static_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, - uchar *key); -extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, - uchar *key); +extern void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos); +extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length, uint nextflag, uint *diff_length); +extern uint _mi_get_static_key(MI_KEYDEF *keyinfo, uint nod_flag, + uchar **page, uchar *key); +extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, uchar **page, + uchar *key); extern uint _mi_get_binary_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, - uchar **page_pos, uchar *key); -extern uchar *_mi_get_last_key(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *keypos, - uchar *lastkey,uchar *endpos, - uint *return_key_length); + uchar ** page_pos, uchar *key); +extern uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *keypos, uchar *lastkey, uchar *endpos, + uint *return_key_length); extern uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, - uchar *key, uchar *keypos, uint *return_key_length); -extern uint _mi_keylength(MI_KEYDEF *keyinfo,uchar *key); + uchar *key, uchar *keypos, + uint *return_key_length); +extern uint _mi_keylength(MI_KEYDEF *keyinfo, uchar *key); extern uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key, - HA_KEYSEG *end); -extern uchar *_mi_move_key(MI_KEYDEF *keyinfo,uchar *to,uchar *from); -extern int _mi_search_next(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uint key_length,uint nextflag,my_off_t pos); -extern int _mi_search_first(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern int _mi_search_last(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern uchar *_mi_fetch_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - int level,uchar *buff,int return_buffer); -extern int _mi_write_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - int level, uchar *buff); -extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos, - int level); -extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo,int level); -extern uint _mi_make_key(MI_INFO *info,uint keynr,uchar *key, - const byte *record,my_off_t filepos); -extern uint _mi_pack_key(MI_INFO *info,uint keynr,uchar *key,uchar *old, - uint key_length, HA_KEYSEG **last_used_keyseg); -extern int _mi_read_key_record(MI_INFO *info,my_off_t filepos,byte *buf); -extern int _mi_read_cache(IO_CACHE *info,byte *buff,my_off_t pos, - uint length,int re_read_if_possibly); -extern void update_auto_increment(MI_INFO *info,const byte *record); + HA_KEYSEG *end); +extern uchar *_mi_move_key(MI_KEYDEF *keyinfo, uchar *to, uchar *from); +extern int _mi_search_next(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uint nextflag, my_off_t pos); +extern int _mi_search_first(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos); +extern int _mi_search_last(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos); +extern uchar *_mi_fetch_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, + my_off_t page, int level, uchar *buff, + int return_buffer); +extern int _mi_write_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, + int level, uchar *buff); +extern int _mi_dispose(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos, + int level); +extern my_off_t _mi_new(MI_INFO *info, MI_KEYDEF *keyinfo, int level); +extern uint _mi_make_key(MI_INFO *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos); +extern uint _mi_pack_key(MI_INFO *info, uint keynr, uchar *key, uchar *old, + uint key_length, HA_KEYSEG ** last_used_keyseg); +extern int _mi_read_key_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, + uint length, int re_read_if_possibly); +extern void update_auto_increment(MI_INFO *info, const byte *record); -extern byte *mi_alloc_rec_buff(MI_INFO *,ulong, byte**); +extern byte *mi_alloc_rec_buff(MI_INFO *, ulong, byte **); #define mi_get_rec_buff_ptr(info,buf) \ ((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \ (buf) - MI_REC_BUFF_OFFSET : (buf)) #define mi_get_rec_buff_len(info,buf) \ (*((uint32 *)(mi_get_rec_buff_ptr(info,buf)))) -extern ulong _mi_rec_unpack(MI_INFO *info,byte *to,byte *from, - ulong reclength); -extern my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *packpos, +extern ulong _mi_rec_unpack(MI_INFO *info, byte *to, byte *from, + ulong reclength); +extern my_bool _mi_rec_check(MI_INFO *info, const char *record, byte *packpos, ulong packed_length, my_bool with_checkum); -extern int _mi_write_part_record(MI_INFO *info,my_off_t filepos,ulong length, - my_off_t next_filepos,byte **record, - ulong *reclength,int *flag); -extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key, - uint length); -extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys); -extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf); -extern int _mi_read_rnd_pack_record(MI_INFO*, byte *,my_off_t, my_bool); -extern int _mi_pack_rec_unpack(MI_INFO *info,byte *to,byte *from, - ulong reclength); -extern ulonglong mi_safe_mul(ulonglong a,ulonglong b); +extern int _mi_write_part_record(MI_INFO *info, my_off_t filepos, ulong length, + my_off_t next_filepos, byte ** record, + ulong *reclength, int *flag); +extern void _mi_print_key(FILE *stream, HA_KEYSEG *keyseg, const uchar *key, + uint length); +extern my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys); +extern int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_read_rnd_pack_record(MI_INFO *, byte *, my_off_t, my_bool); +extern int _mi_pack_rec_unpack(MI_INFO *info, byte *to, byte *from, + ulong reclength); +extern ulonglong mi_safe_mul(ulonglong a, ulonglong b); extern int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, - const byte *oldrec, const byte *newrec, my_off_t pos); + const byte *oldrec, const byte *newrec, + my_off_t pos); struct st_sort_info; -typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */ +typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */ +{ uchar header[MI_BLOCK_INFO_HEADER_LENGTH]; ulong rec_len; ulong data_len; @@ -634,35 +595,37 @@ typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */ uint offset; } MI_BLOCK_INFO; - /* bits in return from _mi_get_block_info */ + /* bits in return from _mi_get_block_info */ -#define BLOCK_FIRST 1 -#define BLOCK_LAST 2 -#define BLOCK_DELETED 4 -#define BLOCK_ERROR 8 /* Wrong data */ -#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ -#define BLOCK_FATAL_ERROR 32 /* hardware-error */ +#define BLOCK_FIRST 1 +#define BLOCK_LAST 2 +#define BLOCK_DELETED 4 +#define BLOCK_ERROR 8 /* Wrong data */ +#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ +#define BLOCK_FATAL_ERROR 32 /* hardware-error */ -#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ -#define MAXERR 20 -#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ -#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE -#define INDEX_TMP_EXT ".TMM" -#define DATA_TMP_EXT ".TMD" +#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ +#define MAXERR 20 +#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE +#define INDEX_TMP_EXT ".TMM" +#define DATA_TMP_EXT ".TMD" -#define UPDATE_TIME 1 -#define UPDATE_STAT 2 -#define UPDATE_SORT 4 -#define UPDATE_AUTO_INC 8 -#define UPDATE_OPEN_COUNT 16 +#define UPDATE_TIME 1 +#define UPDATE_STAT 2 +#define UPDATE_SORT 4 +#define UPDATE_AUTO_INC 8 +#define UPDATE_OPEN_COUNT 16 -#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) -#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) -#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) -#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) +#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) +#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) -enum myisam_log_commands { - MI_LOG_OPEN,MI_LOG_WRITE,MI_LOG_UPDATE,MI_LOG_DELETE,MI_LOG_CLOSE,MI_LOG_EXTRA,MI_LOG_LOCK,MI_LOG_DELETE_ALL +enum myisam_log_commands +{ + MI_LOG_OPEN, MI_LOG_WRITE, MI_LOG_UPDATE, MI_LOG_DELETE, MI_LOG_CLOSE, + MI_LOG_EXTRA, MI_LOG_LOCK, MI_LOG_DELETE_ALL }; #define myisam_log(a,b,c,d) if (myisam_log_file >= 0) _myisam_log(a,b,c,d) @@ -672,34 +635,32 @@ enum myisam_log_commands { #define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0) #define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1) -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif - -extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t); -extern uint _mi_rec_pack(MI_INFO *info,byte *to,const byte *from); + extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t); +extern uint _mi_rec_pack(MI_INFO *info, byte *to, const byte *from); extern uint _mi_pack_get_block_info(MI_INFO *, MI_BLOCK_INFO *, File, my_off_t); -extern void _my_store_blob_length(byte *pos,uint pack_length,uint length); -extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info, - const byte *buffert,uint length); +extern void _mi_store_blob_length(byte *pos, uint pack_length, uint length); +extern void _myisam_log(enum myisam_log_commands command, MI_INFO *info, + const byte *buffert, uint length); extern void _myisam_log_command(enum myisam_log_commands command, - MI_INFO *info, const byte *buffert, - uint length, int result); -extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info, - const byte *record,my_off_t filepos, - int result); + MI_INFO *info, const byte *buffert, + uint length, int result); +extern void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info, + const byte *record, my_off_t filepos, + int result); extern void mi_report_error(int errcode, const char *file_name); extern my_bool _mi_memmap_file(MI_INFO *info); extern void _mi_unmap_file(MI_INFO *info); extern uint save_pack_length(uint version, byte *block_buff, ulong length); -extern uint read_pack_length(uint version, const uchar *buf, ulong *length); extern uint calc_pack_length(uint version, ulong length); extern uint mi_mmap_pread(MI_INFO *info, byte *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_mmap_pwrite(MI_INFO *info, byte *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_nommap_pread(MI_INFO *info, byte *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_nommap_pwrite(MI_INFO *info, byte *Buffer, uint Count, my_off_t offset, myf MyFlags); @@ -707,7 +668,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite); uchar *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state); uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead); uint mi_base_info_write(File file, MI_BASE_INFO *base); -uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); +uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); int mi_keyseg_write(File file, const HA_KEYSEG *keyseg); char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg); uint mi_keydef_write(File file, MI_KEYDEF *keydef); @@ -719,22 +680,22 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo); extern int mi_disable_indexes(MI_INFO *info); extern int mi_enable_indexes(MI_INFO *info); extern int mi_indexes_are_disabled(MI_INFO *info); -ulong _my_calc_total_blob_length(MI_INFO *info, const byte *record); +ulong _mi_calc_total_blob_length(MI_INFO *info, const byte *record); ha_checksum mi_checksum(MI_INFO *info, const byte *buf); ha_checksum mi_static_checksum(MI_INFO *info, const byte *buf); my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record, - ha_checksum unique_hash, my_off_t pos); + ha_checksum unique_hash, my_off_t pos); ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *buf); int _mi_cmp_static_unique(MI_INFO *info, MI_UNIQUEDEF *def, - const byte *record, my_off_t pos); + const byte *record, my_off_t pos); int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def, - const byte *record, my_off_t pos); + const byte *record, my_off_t pos); int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, - my_bool null_are_equal); -void mi_get_status(void* param, int concurrent_insert); -void mi_update_status(void* param); -void mi_copy_status(void* to,void *from); -my_bool mi_check_status(void* param); + my_bool null_are_equal); +void mi_get_status(void *param, int concurrent_insert); +void mi_update_status(void *param); +void mi_copy_status(void *to, void *from); +my_bool mi_check_status(void *param); void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows); extern MI_INFO *test_if_reopen(char *filename); @@ -742,24 +703,18 @@ my_bool check_table_is_closed(const char *name, const char *where); int mi_open_datafile(MI_INFO *info, MYISAM_SHARE *share, File file_to_dup); int mi_open_keyfile(MYISAM_SHARE *share); void mi_setup_functions(register MYISAM_SHARE *share); +my_bool mi_dynmap_file(MI_INFO *info, my_off_t size); +void mi_remap_file(MI_INFO *info, my_off_t size); /* Functions needed by mi_check */ -volatile int *killed_ptr(MI_CHECK *param); -void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...)); -void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...)); -void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...)); -int flush_pending_blocks(MI_SORT_PARAM *param); -int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); -int thr_write_keys(MI_SORT_PARAM *sort_param); +volatile int *killed_ptr(HA_CHECK *param); +void mi_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void mi_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void mi_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); #ifdef THREAD pthread_handler_t thr_find_all_keys(void *arg); #endif -int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file); - -int sort_write_record(MI_SORT_PARAM *sort_param); -int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); - +int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file); #ifdef __cplusplus } #endif - diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c index de55b86252c..2b36e6438c1 100644 --- a/storage/myisam/myisamlog.c +++ b/storage/myisam/myisamlog.c @@ -806,7 +806,7 @@ static int find_record_with_key(struct file_info *file_info, byte *record) { uint key; MI_INFO *info=file_info->isam; - uchar tmp_key[MI_MAX_KEY_BUFF]; + uchar tmp_key[HA_MAX_KEY_BUFF]; for (key=0 ; key < info->s->base.keys ; key++) { diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index e80a3ffacd9..18313603394 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -2726,6 +2726,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) break; } case FIELD_LAST: + case FIELD_enum_val_count: abort(); /* Impossible */ } start_pos+=count->max_zero_fill; diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c index 97554dca4e6..9803b4e110b 100644 --- a/storage/myisam/rt_index.c +++ b/storage/myisam/rt_index.c @@ -540,7 +540,7 @@ static int rtree_insert_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, int res; if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - MI_MAX_KEY_BUFF))) + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; @@ -654,7 +654,7 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key, uint nod_flag = info->s->base.key_reflength; if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - MI_MAX_KEY_BUFF))) + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index c9562461f56..443d2b11afe 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -16,7 +16,7 @@ /* Creates a index for a database by reading keys, sorting them and outputing - them in sorted order through SORT_INFO functions. + them in sorted order through MI_SORT_INFO functions. */ #include "fulltext.h" @@ -459,8 +459,8 @@ ok: int thr_write_keys(MI_SORT_PARAM *sort_param) { - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; ulong length, keys; ulong *rec_per_key_part=param->rec_per_key_part; int got_error=sort_info->got_error; @@ -870,7 +870,6 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, BUFFPEK *buffpek,**refpek; QUEUE queue; volatile int *killed= killed_ptr(info->sort_info->param); - DBUG_ENTER("merge_buffers"); count=error=0; From a63446fe798735505020e96eecf5b8f1c478292f Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 11 Apr 2006 20:00:57 +0300 Subject: [PATCH 03/95] Add ma_test_all.res storage/maria/ma_test_all.res: New BitKeeper file ``storage/maria/ma_test_all.res'' --- storage/maria/ma_test_all.res | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 storage/maria/ma_test_all.res diff --git a/storage/maria/ma_test_all.res b/storage/maria/ma_test_all.res new file mode 100644 index 00000000000..7ffd3378b51 --- /dev/null +++ b/storage/maria/ma_test_all.res @@ -0,0 +1,53 @@ +maria_chk: MARIA file test1 +maria_chk: warning: Size of indexfile is: 8192 Should be: 16384 +MARIA-table 'test1' is usable but should be fixed +ma_test2 -s -L -K -R1 -m2000 ; Should give error 135 +Error: 135 in write at record: 1105 +got error: 135 when using MARIA-database +maria_chk: MARIA file test2 +maria_chk: warning: Datafile is almost full, 65532 of 65534 used +MARIA-table 'test2' is usable but should be fixed +Commands Used count Errors Recover errors +open 1 0 0 +write 50 0 0 +update 5 0 0 +delete 50 0 0 +close 1 0 0 +extra 6 0 0 +Total 113 0 0 +Commands Used count Errors Recover errors +open 2 0 0 +write 100 0 0 +update 10 0 0 +delete 100 0 0 +close 2 0 0 +extra 12 0 0 +Total 226 0 0 + +real 0m0.994s +user 0m0.432s +sys 0m0.184s + +real 0m2.153s +user 0m1.196s +sys 0m0.228s + +real 0m1.483s +user 0m0.772s +sys 0m0.180s + +real 0m1.992s +user 0m1.180s +sys 0m0.188s + +real 0m2.028s +user 0m1.184s +sys 0m0.152s + +real 0m1.878s +user 0m1.028s +sys 0m0.136s + +real 0m1.980s +user 0m1.116s +sys 0m0.192s From 379b95780824f3e7d915f6d8dd7a976e924e5723 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 12 Apr 2006 15:36:53 +0200 Subject: [PATCH 04/95] easy fixes to ma_test_all (to make shell happy) storage/maria/ma_test_all.sh: ; is needed before "else". Some ./ for executables to be found. --- storage/maria/ma_test_all.sh | 256 +++++++++++++++++------------------ 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index d848bc63b9a..f857127dca9 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -6,142 +6,142 @@ valgrind="valgrind --alignment=8 --leak-check=yes" silent="-s" -if test -f ma_test1$MACH ; then suffix=$MACH else suffix=""; fi -ma_test1$suffix $silent -maria_chk$suffix -se test1 -ma_test1$suffix $silent -N -S -maria_chk$suffix -se test1 -ma_test1$suffix $silent -P --checksum -maria_chk$suffix -se test1 -ma_test1$suffix $silent -P -N -S -maria_chk$suffix -se test1 -ma_test1$suffix $silent -B -N -R2 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -k 480 --unique -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -N -S -R1 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -p -S -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -p -S -N --unique -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -p -S -N --key_length=127 --checksum -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -p -S -N --key_length=128 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -p -S --key_length=480 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -B -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -B --key_length=64 --unique -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -B -k 480 --checksum -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -m -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -m -P --unique --checksum -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -m -p -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -w -S --unique -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -w --key_length=64 --checksum -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -w -N --key_length=480 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -w -S --key_length=480 --checksum -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -b -N -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -a -b --key_length=480 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent -p -B --key_length=480 -maria_chk$suffix -sm test1 +if test -f ma_test1$MACH ; then suffix=$MACH ; else suffix=""; fi +./ma_test1$suffix $silent +./maria_chk$suffix -se test1 +./ma_test1$suffix $silent -N -S +./maria_chk$suffix -se test1 +./ma_test1$suffix $silent -P --checksum +./maria_chk$suffix -se test1 +./ma_test1$suffix $silent -P -N -S +./maria_chk$suffix -se test1 +./ma_test1$suffix $silent -B -N -R2 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -k 480 --unique +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -N -S -R1 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -p -S +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -p -S -N --unique +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -p -S -N --key_length=127 --checksum +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -p -S -N --key_length=128 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -p -S --key_length=480 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -B +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -B --key_length=64 --unique +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -B -k 480 --checksum +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -m +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -m -P --unique --checksum +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -m -p +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -w -S --unique +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -w --key_length=64 --checksum +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -w -N --key_length=480 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -w -S --key_length=480 --checksum +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -b -N +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -a -b --key_length=480 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent -p -B --key_length=480 +./maria_chk$suffix -sm test1 -ma_test1$suffix $silent --checksum -maria_chk$suffix -se test1 -maria_chk$suffix -rs test1 -maria_chk$suffix -se test1 -maria_chk$suffix -rqs test1 -maria_chk$suffix -se test1 -maria_chk$suffix -rs --correct-checksum test1 -maria_chk$suffix -se test1 -maria_chk$suffix -rqs --correct-checksum test1 -maria_chk$suffix -se test1 -maria_chk$suffix -ros --correct-checksum test1 -maria_chk$suffix -se test1 -maria_chk$suffix -rqos --correct-checksum test1 -maria_chk$suffix -se test1 +./ma_test1$suffix $silent --checksum +./maria_chk$suffix -se test1 +./maria_chk$suffix -rs test1 +./maria_chk$suffix -se test1 +./maria_chk$suffix -rqs test1 +./maria_chk$suffix -se test1 +./maria_chk$suffix -rs --correct-checksum test1 +./maria_chk$suffix -se test1 +./maria_chk$suffix -rqs --correct-checksum test1 +./maria_chk$suffix -se test1 +./maria_chk$suffix -ros --correct-checksum test1 +./maria_chk$suffix -se test1 +./maria_chk$suffix -rqos --correct-checksum test1 +./maria_chk$suffix -se test1 # check of maria_pack / maria_chk -maria_pack$suffix --force -s test1 -maria_chk$suffix -es test1 -maria_chk$suffix -rqs test1 -maria_chk$suffix -es test1 -maria_chk$suffix -rs test1 -maria_chk$suffix -es test1 -maria_chk$suffix -rus test1 -maria_chk$suffix -es test1 +./maria_pack$suffix --force -s test1 +./maria_chk$suffix -es test1 +./maria_chk$suffix -rqs test1 +./maria_chk$suffix -es test1 +./maria_chk$suffix -rs test1 +./maria_chk$suffix -es test1 +./maria_chk$suffix -rus test1 +./maria_chk$suffix -es test1 -ma_test1$suffix $silent --checksum -S -maria_chk$suffix -se test1 -maria_chk$suffix -ros test1 -maria_chk$suffix -rqs test1 -maria_chk$suffix -se test1 +./ma_test1$suffix $silent --checksum -S +./maria_chk$suffix -se test1 +./maria_chk$suffix -ros test1 +./maria_chk$suffix -rqs test1 +./maria_chk$suffix -se test1 -maria_pack$suffix --force -s test1 -maria_chk$suffix -rqs test1 -maria_chk$suffix -es test1 -maria_chk$suffix -rus test1 -maria_chk$suffix -es test1 +./maria_pack$suffix --force -s test1 +./maria_chk$suffix -rqs test1 +./maria_chk$suffix -es test1 +./maria_chk$suffix -rus test1 +./maria_chk$suffix -es test1 -ma_test1$suffix $silent --checksum --unique -maria_chk$suffix -se test1 -ma_test1$suffix $silent --unique -S -maria_chk$suffix -se test1 +./ma_test1$suffix $silent --checksum --unique +./maria_chk$suffix -se test1 +./ma_test1$suffix $silent --unique -S +./maria_chk$suffix -se test1 -ma_test1$suffix $silent --key_multiple -N -S -maria_chk$suffix -sm test1 -ma_test1$suffix $silent --key_multiple -a -p --key_length=480 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent --key_multiple -a -B --key_length=480 -maria_chk$suffix -sm test1 -ma_test1$suffix $silent --key_multiple -P -S -maria_chk$suffix -sm test1 +./ma_test1$suffix $silent --key_multiple -N -S +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent --key_multiple -a -p --key_length=480 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent --key_multiple -a -B --key_length=480 +./maria_chk$suffix -sm test1 +./ma_test1$suffix $silent --key_multiple -P -S +./maria_chk$suffix -sm test1 -ma_test2$suffix $silent -L -K -W -P -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -L -K -W -P -A -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 +./ma_test2$suffix $silent -L -K -W -P +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -L -K -W -P -A +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -L -K -R1 -m2000 -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -L -K -P -S -R3 -m50 -b1000000 -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -L -B -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -D -B -c -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -m10000 -e8192 -K -maria_chk$suffix -sm test2 -ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L -maria_chk$suffix -sm test2 +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -L -K -R1 -m2000 +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -L -K -P -S -R3 -m50 -b1000000 +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -L -B +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -D -B -c +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -m10000 -e8192 -K +./maria_chk$suffix -sm test2 +./ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L +./maria_chk$suffix -sm test2 -ma_test2$suffix $silent -L -K -W -P -m50 -l -maria_log$suffix -ma_test2$suffix $silent -L -K -W -P -m50 -l -b100 -maria_log$suffix -time ma_test2$suffix $silent -time ma_test2$suffix $silent -K -B -time ma_test2$suffix $silent -L -B -time ma_test2$suffix $silent -L -K -B -time ma_test2$suffix $silent -L -K -W -B -time ma_test2$suffix $silent -L -K -W -S -B -time ma_test2$suffix $silent -D -K -W -S -B +./ma_test2$suffix $silent -L -K -W -P -m50 -l +./maria_log$suffix +./ma_test2$suffix $silent -L -K -W -P -m50 -l -b100 +./maria_log$suffix +time ./ma_test2$suffix $silent +time ./ma_test2$suffix $silent -K -B +time ./ma_test2$suffix $silent -L -B +time ./ma_test2$suffix $silent -L -K -B +time ./ma_test2$suffix $silent -L -K -W -B +time ./ma_test2$suffix $silent -L -K -W -S -B +time ./ma_test2$suffix $silent -D -K -W -S -B From 99a86a34bbff7cdc01d1ee47d461227304fc8d51 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 12 Apr 2006 15:40:50 +0200 Subject: [PATCH 05/95] Exact same change as the one I just pushed in 5.1-new: 10 MB is enough for most cases. mysql-test/mysql-test-run.pl: 10 instead of 128 --- mysql-test/mysql-test-run.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 6cec64e7120..918c7457677 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -2566,7 +2566,7 @@ sub mysqld_arguments ($$$$$$) { mtr_add_arg($args, "%s--server-id=%d", $prefix, $id); mtr_add_arg($args, "%s--socket=%s", $prefix, $master->[$idx]->{'path_mysock'}); - mtr_add_arg($args, "%s--innodb_data_file_path=ibdata1:128M:autoextend", $prefix); + mtr_add_arg($args, "%s--innodb_data_file_path=ibdata1:10M:autoextend", $prefix); mtr_add_arg($args, "%s--local-infile", $prefix); mtr_add_arg($args, "%s--datadir=%s", $prefix, $master->[$idx]->{'path_myddir'}); From 06f7675b957fa68c69ae37c31b457032921f838e Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 27 Apr 2006 16:06:46 +0200 Subject: [PATCH 06/95] Maria: first version of checkpoint (WL#3071), least-recently-dirtied page flushing (WL#3261), recovery (WL#3072), control file (WL#3234), to serve as a detailed LLD. It looks like C code, but does not compile (no point in making it compile, as other modules on which I depend are not yet fully speficied or written); some pieces are not coded and just marked in comments. Files' organization (names, directories of C files) does not matter at this point. I don't think I had to commit so early, but it feels good to publish something, gives me the impression of moving forward :) storage/maria/checkpoint.c: WL#3071 Maria checkpoint, implementation storage/maria/checkpoint.h: WL#3071 Maria checkpoint, interface storage/maria/control_file.c: WL#3234 Maria control file, implementation storage/maria/control_file.h: WL#3234 Maria control file, interface storage/maria/least_recently_dirtied.c: WL#3261 Maria background flushing of least-recently-dirtied pages, implementation storage/maria/least_recently_dirtied.h: WL#3261 Maria background flushing of least-recently-dirtied pages, interface storage/maria/recovery.c: WL#3072 Maria recovery, implementation storage/maria/recovery.h: WL#3072 Maria recovery, interface --- storage/maria/checkpoint.c | 394 +++++++++++++++++++++++++ storage/maria/checkpoint.h | 23 ++ storage/maria/control_file.c | 77 +++++ storage/maria/control_file.h | 24 ++ storage/maria/least_recently_dirtied.c | 175 +++++++++++ storage/maria/least_recently_dirtied.h | 10 + storage/maria/recovery.c | 224 ++++++++++++++ storage/maria/recovery.h | 10 + 8 files changed, 937 insertions(+) create mode 100644 storage/maria/checkpoint.c create mode 100644 storage/maria/checkpoint.h create mode 100644 storage/maria/control_file.c create mode 100644 storage/maria/control_file.h create mode 100644 storage/maria/least_recently_dirtied.c create mode 100644 storage/maria/least_recently_dirtied.h create mode 100644 storage/maria/recovery.c create mode 100644 storage/maria/recovery.h diff --git a/storage/maria/checkpoint.c b/storage/maria/checkpoint.c new file mode 100644 index 00000000000..af37377455e --- /dev/null +++ b/storage/maria/checkpoint.c @@ -0,0 +1,394 @@ +/* + WL#3071 Maria checkpoint + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" +#include "transaction.h" +#include "share.h" +#include "log.h" + +/* + this transaction is used for any system work (purge, checkpoint writing + etc), that is, background threads. It will not be declared/initialized here + in the final version. +*/ +st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,...}; + +/* + The maximum rec_lsn in the LRD when last checkpoint was run, serves for the + MEDIUM checkpoint. +*/ +LSN max_rec_lsn_at_last_checkpoint= 0; + +/* Picks a checkpoint request and executes it */ +my_bool checkpoint() +{ + CHECKPOINT_LEVEL level; + DBUG_ENTER("checkpoint"); + + level= checkpoint_running= checkpoint_request; + unlock(log_mutex); + + DBUG_ASSERT(level != NONE); + + switch (level) + { + case FULL: + /* flush all pages up to the current end of the LRD */ + flush_all_LRD_to_lsn(MAX_LSN); /* MAX_LSN==ULONGLONG_MAX */ + /* this will go full speed (normal scheduling, no sleep) */ + break; + case MEDIUM: + /* + flush all pages which were already dirty at last checkpoint: + ensures that recovery will never start from before the next-to-last + checkpoint (two-checkpoint rule). + It is max, not min as the WL says (TODO update WL). + */ + flush_all_LRD_to_lsn(max_rec_lsn_at_last_checkpoint); + /* this will go full speed (normal scheduling, no sleep) */ + break; + } + + error= checkpoint_indirect(); + + lock(log_mutex); + /* + this portion cannot be done as a hook in write_log_record() for the + LOGREC_CHECKPOINT type because: + - at that moment we still have not written to the control file so cannot + mark the request as done; this could be solved by writing to the control + file in the hook but that would be an I/O under the log's mutex, bad. + - it would not be nice organisation of code (I tried it :). + */ + mark_checkpoint_done(error); + unlock(log_mutex); + DBUG_RETURN(error); +} + + +my_bool checkpoint_indirect() +{ + DBUG_ENTER("checkpoint_indirect"); + + int error= 0; + /* checkpoint record data: */ + LSN checkpoint_start_lsn; + LEX_STRING string1={0,0}, string2={0,0}, string3={0,0}; + LEX_STRING *string_array[4]; + char *ptr; + LSN checkpoint_lsn; + LSN candidate_max_rec_lsn_at_last_checkpoint= 0; + list_element *el; /* to scan lists */ + + + DBUG_ASSERT(sizeof(byte *) <= 8); + DBUG_ASSERT(sizeof(LSN) <= 8); + + lock(log_mutex); /* will probably be in log_read_end_lsn() already */ + checkpoint_start_lsn= log_read_end_lsn(); + unlock(log_mutex); + + DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); + + lock(global_LRD_mutex); + string1.length= 8+8+(8+8)*LRD->count; + if (NULL == (string1.str= my_malloc(string1.length))) + goto err; + ptr= string1.str; + int8store(ptr, checkpoint_start_lsn); + ptr+= 8; + int8store(ptr, LRD->count); + ptr+= 8; + if (LRD->count) + { + candidate_max_rec_lsn_at_last_checkpoint= LRD->last->rec_lsn; + for (el= LRD->first; el; el= el->next) + { + int8store(ptr, el->page_id); + ptr+= 8; + int8store(ptr, el->rec_lsn); + ptr+= 8; + } + } + unlock(global_LRD_mutex); + + /* + If trx are in more than one list (e.g. three: + running transactions, committed transactions, purge queue), we can either + take mutexes of all three together or do crabbing. + But if an element can move from list 1 to list 3 without passing through + list 2, crabbing is dangerous. + Hopefully it's ok to take 3 mutexes together... + Otherwise I'll have to make sure I miss no important trx and I handle dups. + */ + lock(global_transactions_list_mutex); /* or 3 mutexes if there are 3 */ + string2.length= 8+(8+8)*trx_list->count; + if (NULL == (string2.str= my_malloc(string2.length))) + goto err; + ptr= string2.str; + int8store(ptr, trx_list->count); + ptr+= 8; + for (el= trx_list->first; el; el= el->next) + { + /* possibly latch el.rwlock */ + *ptr= el->state; + ptr++; + int7store(ptr, el->long_trans_id); + ptr+= 7; + int2store(ptr, el->short_trans_id); + ptr+= 2; + int8store(ptr, el->undo_lsn); + ptr+= 8; + int8store(ptr, el->undo_purge_lsn); + ptr+= 8; + /* + if no latch, use double variable of type ULONGLONG_CONSISTENT in + st_transaction, or even no need if Intel >=486 + */ + int8store(ptr, el->first_purge_lsn); + ptr+= 8; + /* possibly unlatch el.rwlock */ + } + unlock(global_transactions_list_mutex); + + lock(global_share_list_mutex); + string3.length= 8+(8+8)*share_list->count; + if (NULL == (string3.str= my_malloc(string3.length))) + goto err; + ptr= string3.str; + /* possibly latch each MARIA_SHARE */ + make_copy_of_global_share_list_to_array; + unlock(global_share_list_mutex); + + /* work on copy */ + int8store(ptr, elements_in_array); + ptr+= 8; + for (scan_array) + { + int8store(ptr, array[...].file_id); + ptr+= 8; + memcpy(ptr, array[...].file_name, ...); + ptr+= ...; + /* + these two are long ops (involving disk I/O) that's why we copied the + list: + */ + flush_bitmap_pages(el); + /* + fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per + second, so if you have touched 1000 files it's 7 seconds). + */ + force_file(el); + } + + /* now write the record */ + string_array[0]= string1; + string_array[1]= string2; + string_array[2]= string3; + string_array[3]= NULL; + + checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, + &system_trans, string_array); + + if (0 == checkpoint_lsn) /* maybe 0 is impossible LSN to indicate error ? */ + goto err; + + if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) + goto err; + + maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; + + DBUG_RETURN(0); + +err: + + print_error_to_error_log(the_error_message); + my_free(buffer1.str, MYF(MY_ALLOW_ZERO_PTR)); + my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR)); + my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR)); + + DBUG_RETURN(1); +} + + + +/* + Here's what should be put in log_write_record() in the log handler: +*/ +log_write_record(...) +{ + ...; + lock(log_mutex); + ...; + write_to_log(length); + written_since_last_checkpoint+= length; + if (written_since_last_checkpoint > + MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS) + { + /* + ask one system thread (the "LRD background flusher and checkpointer + thread" WL#3261) to do a checkpoint + */ + request_checkpoint(INDIRECT, 0 /*wait_for_completion*/); + } + ...; + unlock(log_mutex); + ...; +} + +/* + Call this when you want to request a checkpoint. + In real life it will be called by log_write_record() and by client thread + which explicitely wants to do checkpoint (ALTER ENGINE CHECKPOINT + checkpoint_level). +*/ +int request_checkpoint(CHECKPOINT_LEVEL level, my_bool wait_for_completion) +{ + int error= 0; + /* + If caller wants to wait for completion we'll have to release the log mutex + to wait on condition, if caller had log mutex he may not be happy that we + release it, so we check that caller didn't have log mutex. + */ + if (wait_for_completion) + { + lock(log_mutex); + } + else + safemutex_assert_owner(log_mutex); + + DBUG_ASSERT(checkpoint_request >= checkpoint_running); + DBUG_ASSERT(level > NONE); + if (checkpoint_request < level) + { + /* no equal or stronger running or to run, we post request */ + /* + note that thousands of requests for checkpoints are going to come all + at the same time (when the log bound is passed), so it may not be a good + idea for each of them to broadcast a cond. We just don't broacast a + cond, the checkpoint thread will wake up in max one second. + */ + checkpoint_request= level; /* post request */ + } + + if (wait_for_completion) + { + uint checkpoints_done_copy= checkpoints_done; + uint checkpoint_errors_copy= checkpoint_errors; + /* + note that the "==done" works when the uint counter wraps too, so counter + can even be smaller than uint if we wanted (however it should be big + enough so that max_the_int_type checkpoints cannot happen between two + wakeups of our thread below). uint sounds fine. + Wait for our checkpoint to be done: + */ + + if (checkpoint_running != NONE) /* not ours, let it pass */ + { + while (1) + { + if (checkpoints_done != checkpoints_done_copy) + { + if (checkpoints_done == (checkpoints_done_copy+1)) + { + /* not our checkpoint, forget about it */ + checkpoints_done_copy= checkpoints_done; + } + break; /* maybe even ours has been done at this stage! */ + } + cond_wait(checkpoint_done_cond, log_mutex); + } + } + + /* now we come to waiting for our checkpoint */ + while (1) + { + if (checkpoints_done != checkpoints_done_copy) + { + /* our checkpoint has been done */ + break; + } + if (checkpoint_errors != checkpoint_errors_copy) + { + /* + the one which was running a few milliseconds ago (if there was one), + and/or ours, had an error, just assume it was ours. So there + is a possibility that we return error though we succeeded, in which + case user will have to retry; but two simultanate checkpoints have + high changes to fail together (as the error probably comes from + malloc or disk write problem), so chance of false alarm is low. + Reporting the error only to the one which caused the error would + require having a (not fixed size) list of all requests, not worth it. + */ + error= 1; + break; + } + cond_wait(checkpoint_done_cond, log_mutex); + } + unlock(log_mutex); + } /* ... if (wait_for_completion) */ + + /* + If wait_for_completion was false, and there was an error, only an error + message to the error log will say it; normal, for a checkpoint triggered + by a log write, we probably don't want the client's log write to throw an + error, as the log write succeeded and a checkpoint failure is not + critical: the failure in this case is more for the DBA to know than for + the end user. + */ + return error; +} + +void mark_checkpoint_done(int error) +{ + safemutex_assert_owner(log_mutex); + if (error) + checkpoint_errors++; + /* a checkpoint is said done even if it had an error */ + checkpoints_done++; + if (checkpoint_request == checkpoint_running) + { + /* + No new request has been posted, so we satisfied all requests, forget + about them. + */ + checkpoint_request= NONE; + } + checkpoint_running= NONE; + written_since_last_checkpoint= 0; + broadcast(checkpoint_done_cond); +} + +/* + Alternative (not to be done, too disturbing): + do the autocheckpoint in the thread which passed the bound first (and do the + checkpoint in the client thread which requested it). + It will give a delay to that client thread which passed the bound (time to + fsync() for example 1000 files is 16 s on my laptop). Here is code for + explicit and implicit checkpoints, where client thread does the job: +*/ +#if 0 +{ + lock(log_mutex); /* explicit takes it here, implicit already has it */ + while (checkpoint_running != NONE) + { + if (checkpoint_running >= my_level) /* always true for auto checkpoints */ + goto end; /* we skip checkpoint */ + /* a less strong is running, I'll go next */ + wait_on_checkpoint_done_cond(); + } + checkpoint_running= my_level; + checkpoint(my_level); // can gather checkpoint_start_lsn before unlock + lock(log_mutex); + checkpoint_running= NONE; + written_since_last_checkpoint= 0; +end: + unlock(log_mutex); +} +#endif diff --git a/storage/maria/checkpoint.h b/storage/maria/checkpoint.h new file mode 100644 index 00000000000..ce8066de93d --- /dev/null +++ b/storage/maria/checkpoint.h @@ -0,0 +1,23 @@ +/* + WL#3071 Maria checkpoint + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +typedef enum enum_checkpoint_level { + NONE=-1, + INDIRECT, /* just write dirty_pages, transactions table and sync files */ + MEDIUM, /* also flush all dirty pages which were already dirty at prev checkpoint*/ + FULL /* also flush all dirty pages */ +} CHECKPOINT_LEVEL; + +/* + Call this when you want to request a checkpoint. + In real life it will be called by log_write_record() and by client thread + which explicitely wants to do checkpoint (ALTER ENGINE CHECKPOINT + checkpoint_level). +*/ +int request_checkpoint(CHECKPOINT_LEVEL level, my_bool wait_for_completion); +/* that's all that's needed in the interface */ diff --git a/storage/maria/control_file.c b/storage/maria/control_file.c new file mode 100644 index 00000000000..897e0b0f0ee --- /dev/null +++ b/storage/maria/control_file.c @@ -0,0 +1,77 @@ +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +/* Control file is 512 bytes (a disk sector), to be as atomic as possible */ + +int control_file_fd; + +/* + Looks for the control file. If absent, it's a fresh start, create file. + If present, read it to find out last checkpoint's LSN and last log. + Called at engine's start. +*/ +int control_file_create_or_open() +{ + char buffer[4]; + /* name is concatenation of Maria's home dir and "control" */ + if ((control_file_fd= my_open(name, O_RDWR)) < 0) + { + /* failure, try to create it */ + if ((control_file_fd= my_create(name, O_RDWR)) < 0) + return 1; + /* + So this is a start from scratch, to be safer we should make sure that + there are no logs or data/index files around (indeed it could be that + the control file alone was deleted or not restored, and we should not + go on with life at this point. + For now we trust (this is alpha version), but for beta if would be great + to verify. + + We could have a tool which can rebuild the control file, by reading the + directory of logs, finding the newest log, reading it to find last + checkpoint... Slow but can save your db. + */ + last_checkpoint_lsn_at_startup= 0; + last_log_name_at_startup= NULL; + return 0; + } + /* Already existing file, read it */ + if (my_read(control_file_fd, buffer, 8, MYF(MY_FNABP))) + return 1; + last_checkpoint_lsn_at_startup= uint8korr(buffer); + if (last_log_name_at_startup= my_malloc(512-8+1)) + return 1; + if (my_read(control_file_fd, last_log_name_at_startup, 512-8), MYF(MY_FNABP)) + return 1; + last_log_name[512-8]= 0; /* end zero to be nice */ + return 0; +} + +/* + Write information durably to the control file. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). +*/ +int control_file_write_and_force(LSN lsn, char *log_name) +{ + char buffer[512]; + uint start=8,end=8; + if (lsn != 0) /* LSN was specified */ + { + start= 0; + int8store(buffer, lsn); + } + if (log_name != NULL) /* log name was specified */ + { + end= 512; + memcpy(buffer+8, log_name, 512-8); + } + DBUG_ASSERT(start != end); + return (my_pwrite(control_file_fd, buffer, end-start, start, MYF(MY_FNABP)) || + my_sync(control_file_fd)) +} diff --git a/storage/maria/control_file.h b/storage/maria/control_file.h new file mode 100644 index 00000000000..d148ab7d88a --- /dev/null +++ b/storage/maria/control_file.h @@ -0,0 +1,24 @@ +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the interface of this module */ + +LSN last_checkpoint_lsn_at_startup; +char *last_log_name_at_startup; + +/* + Looks for the control file. If absent, it's a fresh start, create file. + If present, read it to find out last checkpoint's LSN and last log. + Called at engine's start. +*/ +int control_file_create_or_open(); + +/* + Write information durably to the control file. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). +*/ +int control_file_write_and_force(LSN lsn, char *log_name); diff --git a/storage/maria/least_recently_dirtied.c b/storage/maria/least_recently_dirtied.c new file mode 100644 index 00000000000..e251e3c582c --- /dev/null +++ b/storage/maria/least_recently_dirtied.c @@ -0,0 +1,175 @@ +/* + WL#3261 Maria - background flushing of the least-recently-dirtied pages + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* + To be part of the page cache. + The pseudocode below is dependent on the page cache + which is being designed WL#3134. It is not clear if I need to do page + copies, as the page cache already keeps page copies. + So, this code will move to the page cache and take inspiration from its + methods. Below is just to give the idea of what could be done. + And I should compare my imaginations to WL#3134. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" + +/* + When we flush a page, we should pin page. + This "pin" is to protect against that: + I make copy, + you modify in memory and flush to disk and remove from LRD and from cache, + I write copy to disk, + checkpoint happens. + result: old page is on disk, page is absent from LRD, your REDO will be + wrongly ignored. + + Pin: there can be multiple pins, flushing imposes that there are zero pins. + For example, pin could be a uint counter protected by the page's latch. + + Maybe it's ok if when there is a page replacement, the replacer does not + remove page from the LRD (it would save global mutex); for that, background + flusher should be prepared to see pages in the LRD which are not in the page + cache (then just ignore them). However checkpoint will contain superfluous + entries and so do more work. +*/ + +#define PAGE_SIZE (16*1024) /* just as an example */ +/* + Optimization: + LRD flusher should not flush pages one by one: to be fast, it flushes a + group of pages in sequential disk order if possible; a group of pages is just + FLUSH_GROUP_SIZE pages. + Key cache has groupping already somehow Monty said (investigate that). +*/ +#define FLUSH_GROUP_SIZE 512 /* 8 MB */ + +/* + This thread does background flush of pieces of the LRD, and all checkpoints. + Just launch it when engine starts. +*/ +pthread_handler_decl background_flush_and_checkpoint_thread() +{ + char *flush_group_buffer= my_malloc(PAGE_SIZE*FLUSH_GROUP_SIZE); + while (this_thread_not_killed) + { + lock(log_mutex); + if (checkpoint_request) + checkpoint(); /* will unlock mutex */ + else + { + unlock(log_mutex); + lock(global_LRD_mutex); + flush_one_group_from_LRD(); + safemutex_assert_not_owner(global_LRD_mutex); + } + my_sleep(1000000); /* one second ? */ + } + my_free(flush_group_buffer); +} + +/* + flushes only the first FLUSH_GROUP_SIZE pages of the LRD. +*/ +flush_one_group_from_LRD() +{ + char *ptr; + safe_mutex_assert_owner(global_LRD_mutex); + + for (page= 0; pagedata, PAGE_SIZE); + pin_page; + page_cache_unlatch(page_id, KEEP_PINNED); /* but keep pinned */ + } + for (scan_the_array) + { + /* + As an optimization, we try to identify contiguous-in-the-file segments (to + issue one big write()). + In non-optimized version, contiguous segment is always only one page. + */ + if ((next_page.page_id - this_page.page_id) == 1) + { + /* + this page and next page are in same file and are contiguous in the + file: add page to contiguous segment... + */ + continue; /* defer write() to next pages */ + } + /* contiguous segment ends */ + my_pwrite(file, contiguous_segment_start_offset, contiguous_segment_size); + + /* + note that if we had doublewrite, doublewrite buffer may prevent us from + doing this write() grouping (if doublewrite space is shorter). + */ + } + /* + Now remove pages from LRD. As we have pinned them, all pages that we + managed to pin are still in the LRD, in the same order, we can just cut + the LRD at the last element of "array". This is more efficient that + removing element by element (which would take LRD mutex many times) in the + loop above. + */ + lock(global_LRD_mutex); + /* cut LRD by bending LRD->first, free cut portion... */ + unlock(global_LRD_mutex); + for (scan_array) + { + /* + if the page has a property "modified since last flush" (i.e. which is + redundant with the presence of the page in the LRD, this property can + just be a pointer to the LRD element) we should reset it + (note that then the property would live slightly longer than + the presence in LRD). + */ + page_cache_unpin(page_id); + /* + order between unpin and removal from LRD is not clear, depends on what + pin actually is. + */ + } + free(array); +} + +/* flushes all page from LRD up to approximately rec_lsn>=max_lsn */ +int flush_all_LRD_to_lsn(LSN max_lsn) +{ + lock(global_LRD_mutex); + if (max_lsn == MAX_LSN) /* don't want to flush forever, so make it fixed: */ + max_lsn= LRD->first->prev->rec_lsn; + while (LRD->first->rec_lsn < max_lsn) + { + if (flush_one_group_from_LRD()) /* will unlock mutex */ + return 1; + /* scheduler may preempt us here so that we don't take full CPU */ + lock(global_LRD_mutex); + } + unlock(global_LRD_mutex); + return 0; +} diff --git a/storage/maria/least_recently_dirtied.h b/storage/maria/least_recently_dirtied.h new file mode 100644 index 00000000000..6a30db4b5f0 --- /dev/null +++ b/storage/maria/least_recently_dirtied.h @@ -0,0 +1,10 @@ +/* + WL#3261 Maria - background flushing of the least-recently-dirtied pages + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +/* flushes all page from LRD up to approximately rec_lsn>=max_lsn */ +int flush_all_LRD_to_lsn(LSN max_lsn); diff --git a/storage/maria/recovery.c b/storage/maria/recovery.c new file mode 100644 index 00000000000..aee8f1de749 --- /dev/null +++ b/storage/maria/recovery.c @@ -0,0 +1,224 @@ +/* + WL#3072 Maria recovery + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" +#include "transaction.h" +#include "share.h" +#include "log.h" + +typedef struct st_record_type_properties { + /* used for debug error messages or "maria_read_log" command-line tool: */ + char *name, + my_bool record_ends_group; + int (*record_execute)(RECORD *); /* param will be record header instead later */ +} RECORD_TYPE_PROPERTIES; + +RECORD_TYPE_PROPERTIES all_record_type_properties[]= +{ + /* listed here in the order of the "log records type" enumeration */ + {"REDO_INSERT_HEAD", 0, redo_insert_head_execute}, + ..., + {"UNDO_INSERT" , 1, undo_insert_execute }, + {"COMMIT", , 1, commit_execute }, + ... +}; + +int redo_insert_head_execute(RECORD *record) +{ + /* write the data to the proper page */ +} + +int undo_insert_execute(RECORD *record) +{ + trans_table[short_trans_id].undo_lsn= record.lsn; + /* restore the old version of the row */ +} + +int commit_execute(RECORD *record) +{ + trans_table[short_trans_id].state= COMMITTED; + /* + and that's all: the delete/update handler should not be woken up! as there + may be REDO for purge further in the log. + */ +} + +#define record_ends_group(R) \ + all_record_type_properties[(R)->type].record_ends_group) + +#define execute_log_record(R) \ + all_record_type_properties[(R).type].record_execute(R) + + +int recovery() +{ + control_file_create_or_open(); + /* + init log handler: tell it that we are going to do large reads of the + log, sequential and backward. Log handler could decide to alloc a big + read-only IO_CACHE for this, or use its usual page cache. + */ + + /* read checkpoint log record from log handler */ + RECORD *checkpoint_record= log_read_record(last_checkpoint_lsn_at_start); + + /* parse this record, build structs (dirty_pages, transactions table, file_map) */ + /* + read log records (note: sometimes only the header is needed, for ex during + REDO phase only the header of UNDO is needed, not the 4G blob in the + variable-length part, so I could use that; however for PREPARE (which is a + variable-length record) I'll need to read the full record in the REDO + phase): + */ + + record= log_read_record(min(rec_lsn, ...)); + /* + if log handler knows the end LSN of the log, we could print here how many + MB of log we have to read (to give an idea of the time), and print + progress notes. + */ + + while (record != NULL) + { + /* + A complete group is a set of log records with an "end mark" record + (e.g. a set of REDOs for an operation, terminated by an UNDO for this + operation); if there is no "end mark" record the group is incomplete + and won't be executed. + */ + if (record_ends_group(record) + { + /* + such end events can always be executed immediately (they don't touch + the disk). + */ + execute_log_record(record); + if (trans_table[record.short_trans_id].group_start_lsn != 0) + { + /* + There is a complete group for this transaction. + We're going to read recently read log records: + for this log_read_record() to be efficient (not touch the disk), + log handler could cache recently read pages + (can just use an IO_CACHE of 10 MB to read the log, or the normal + log handler page cache). + Without it only OS file cache will help. + */ + record2= log_read_record(trans_table[record.short_trans_id].group_start_lsn); + while (record2.lsn < record.lsn) + { + if (record2.short_trans_id == record.short_trans_id) + execute_log_record(record2); /* it's in our group */ + record2= log_read_next_record(); + } + trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */ + /* we're now at the UNDO, re-read it to advance log pointer */ + record2= log_read_next_record(); /* and throw it away */ + } + } + else /* record does not end group */ + { + /* just record the fact, can't know if can execute yet */ + if (trans_table[short_trans_id].group_start_lsn == 0) /* group not yet started */ + trans_table[short_trans_id].group_start_lsn= record.lsn; + } + + /* + Later we can optimize: instead of "execute_log_record(record2)", do + copy_record_into_exec_buffer(record2): + this will just copy record into a multi-record (10 MB?) memory buffer, + and when buffer is full, will do sorting of REDOs per + page id and execute them. + This sorting will enable us to do more sequential reads of the + data/index pages. + Note that updating bitmap pages (when we have executed a REDO for a page + we update its bitmap page) may break the sequential read of pages, + so maybe we should read and cache bitmap pages in the beginning. + Or ok the sequence will be broken, but quickly all bitmap pages will be + in memory and so the sequence will not be broken anymore. + Sorting could even determine, based on physical device of files + ("st_dev" in stat()), that some files should be should be taken by + different threads, if we want to do parallism. + */ + /* + Here's how to read a complete variable-length record if needed: + read the header, allocate buffer of record length, read whole + record. + */ + record= log_read_next_record(); + } + + /* + Earlier or here, create true transactions in TM. + If done earlier, note that TM should not wake up the delete/update handler + when it receives a commit info, as existing REDO for purge may exist in + the log, and so the delete/update handler may do changes which conflict + with these REDOs. + Even if done here, better to not wake it up now as we're going to free the + page cache: + */ + + /* + We want to have two steps: + engine->recover_with_max_memory(); + next_engine->recover_with_max_memory(); + engine->init_with_normal_memory(); + next_engine->init_with_normal_memory(); + So: in recover_with_max_memory() allocate a giant page cache, do REDO + phase, then all page cache is flushed and emptied and freed (only retain + small structures like TM): take full checkpoint, which is useful if + next engine crashes in its recovery the next second. + Destroy all shares (maria_close()), then at init_with_normal_memory() we + do this: + */ + + print_information_to_error_log(nb of trans to roll back, nb of prepared trans); + + /* + Launch one or more threads to do the background rollback. Don't wait for + them to complete their rollback (background rollback; for debugging, we + can have an option which waits). + + Note that InnoDB's rollback-in-background works as long as InnoDB is the + last engine to recover, otherwise MySQL will refuse new connections until + the last engine has recovered so it's not "background" from the user's + point of view. InnoDB is near top of sys_table_types so all others + (e.g. BDB) recover after it... So it's really "online rollback" only if + InnoDB is the only engine. + */ + + /* wake up delete/update handler */ + /* tell the TM that it can now accept new transactions */ + + /* + mark that checkpoint requests are now allowed. + */ + /* + when all rollback threads have terminated, somebody should print "rollback + finished" to the error log. + */ +} + +pthread_handler_decl rollback_background_thread() +{ + /* + execute the normal runtime-rollback code for a bunch of transactions. + */ + while (trans in list_of_trans_to_rollback_by_this_thread) + { + while (trans->undo_lsn != 0) + { + /* this is the normal runtime-rollback code: */ + record= log_read_record(trans->undo_lsn); + execute_log_record(record); + trans->undo_lsn= record.prev_undo_lsn; + } + /* remove trans from list */ + } +} diff --git a/storage/maria/recovery.h b/storage/maria/recovery.h new file mode 100644 index 00000000000..b85ffdeef59 --- /dev/null +++ b/storage/maria/recovery.h @@ -0,0 +1,10 @@ +/* + WL#3072 Maria recovery + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +/* Performs recovery of the engine at start */ +int recovery(); From 34dba480548e7cc9647d8c943beb92d161ec0281 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 4 May 2006 10:59:19 +0200 Subject: [PATCH 07/95] WL#3072 Maria recovery, small fixes to pseudocode, discussion why no checkpoint at end of UNDO phase. storage/maria/control_file.h: small fix storage/maria/recovery.c: small fixes to the pseudocode and discussion of why no checkpoint at end of UNDO phase. --- storage/maria/control_file.h | 4 ++-- storage/maria/recovery.c | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/storage/maria/control_file.h b/storage/maria/control_file.h index d148ab7d88a..522e7565341 100644 --- a/storage/maria/control_file.h +++ b/storage/maria/control_file.h @@ -6,8 +6,8 @@ /* Here is the interface of this module */ -LSN last_checkpoint_lsn_at_startup; -char *last_log_name_at_startup; +extern LSN last_checkpoint_lsn_at_startup; +extern char *last_log_name_at_startup; /* Looks for the control file. If absent, it's a fresh start, create file. diff --git a/storage/maria/recovery.c b/storage/maria/recovery.c index aee8f1de749..5af7019e5fb 100644 --- a/storage/maria/recovery.c +++ b/storage/maria/recovery.c @@ -183,7 +183,8 @@ int recovery() /* Launch one or more threads to do the background rollback. Don't wait for them to complete their rollback (background rollback; for debugging, we - can have an option which waits). + can have an option which waits). Set a counter (total_of_rollback_threads) + to the number of threads to lauch. Note that InnoDB's rollback-in-background works as long as InnoDB is the last engine to recover, otherwise MySQL will refuse new connections until @@ -221,4 +222,20 @@ pthread_handler_decl rollback_background_thread() } /* remove trans from list */ } + lock_mutex(rollback_threads); /* or atomic counter */ + if (--total_of_rollback_threads == 0) + { + /* + All rollback threads are done. Print "rollback finished" to the error + log. The UNDO phase has the reputation of being a slow operation + (slower than the REDO phase), so taking a checkpoint at the end of it is + intelligent, but as this UNDO phase generates REDOs and CLR_ENDs, if it + did a lot of work then the "automatic checkpoint when much has been + written to the log" will do it; and if the UNDO phase didn't do a lot of + work, no need for a checkpoint. If we change our mind and want to force + a checkpoint at the end of the UNDO phase, simply call it here. + */ + } + unlock_mutex(rollback_threads); + pthread_exit(); } From 157002b12f4134eb6ca2b84ba95558e3b6638929 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 5 May 2006 20:32:02 +0200 Subject: [PATCH 08/95] WL#3270 "Maria - cleanups of inherited MyISAM functionality" Removing the "external lock" functionality from Maria (as two separate processes wanting to share a table should not only my_lock() the data and index files but also the log files, and share memory (as the latest data is in the page cache), it sounds useless to feature this). Removing the MyISAM logging from Maria (as REDO logging will be done differently). BitKeeper/deleted/.del-maria_log.c~1fb295a18c3f5d4c: Delete: storage/maria/maria_log.c BitKeeper/deleted/.del-ma_log.c~4a44ec11d547772f: Delete: storage/maria/ma_log.c include/maria.h: unneeded storage/maria/Makefile.am: log removed storage/maria/ma_check.c: external locking removed storage/maria/ma_close.c: log removed storage/maria/ma_delete.c: log removed storage/maria/ma_delete_all.c: log removed. Unused var. storage/maria/ma_dynrec.c: external locking removed storage/maria/ma_extra.c: log removed storage/maria/ma_init.c: log removed storage/maria/ma_locking.c: external locking removed, log removed storage/maria/ma_open.c: external locking removed, log removed storage/maria/ma_static.c: log removed storage/maria/ma_statrec.c: external locking removed storage/maria/ma_test2.c: log removed storage/maria/ma_test3.c: log removed storage/maria/ma_update.c: log removed storage/maria/ma_write.c: external locking removed, log removed storage/maria/maria_chk.c: external locking removed storage/maria/maria_def.h: log removed, maria_pid unused. storage/maria/maria_pack.c: fixes for warnings (where pointers are like ulong and so %u is not enough). --- include/maria.h | 2 - storage/maria/Makefile.am | 5 +- storage/maria/ma_check.c | 20 +- storage/maria/ma_close.c | 1 - storage/maria/ma_delete.c | 2 - storage/maria/ma_delete_all.c | 2 - storage/maria/ma_dynrec.c | 6 - storage/maria/ma_extra.c | 1 - storage/maria/ma_init.c | 1 - storage/maria/ma_locking.c | 70 +-- storage/maria/ma_log.c | 164 ------- storage/maria/ma_open.c | 24 +- storage/maria/ma_static.c | 2 - storage/maria/ma_statrec.c | 3 - storage/maria/ma_test2.c | 7 +- storage/maria/ma_test3.c | 11 +- storage/maria/ma_update.c | 2 - storage/maria/ma_write.c | 7 - storage/maria/maria_chk.c | 1 - storage/maria/maria_def.h | 20 - storage/maria/maria_log.c | 848 ---------------------------------- storage/maria/maria_pack.c | 10 +- 22 files changed, 13 insertions(+), 1196 deletions(-) delete mode 100644 storage/maria/ma_log.c delete mode 100644 storage/maria/maria_log.c diff --git a/include/maria.h b/include/maria.h index c7586143fe7..72fb1cf2dff 100644 --- a/include/maria.h +++ b/include/maria.h @@ -372,8 +372,6 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, const char *name, int rep_quick); int maria_change_to_newfile(const char *filename, const char *old_ext, const char *new_ext, uint raid_chunks, myf myflags); -int maria_lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, - const char *filetype, const char *filename); void maria_lock_memory(HA_CHECK *param); int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update); void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index bf22428c18f..4132a4208a2 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -24,9 +24,8 @@ LDADD = @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ pkglib_LIBRARIES = libmaria.a -bin_PROGRAMS = maria_chk maria_log maria_pack maria_ftdump +bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_chk_DEPENDENCIES= $(LIBRARIES) -maria_log_DEPENDENCIES= $(LIBRARIES) maria_pack_DEPENDENCIES=$(LIBRARIES) noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h @@ -47,7 +46,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_delete.c \ ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \ ma_rsamepos.c ma_panic.c ma_close.c ma_create.c\ - ma_range.c ma_dbug.c ma_checksum.c ma_log.c \ + ma_range.c ma_dbug.c ma_checksum.c \ ma_changed.c ma_static.c ma_delete_all.c \ ma_delete_table.c ma_rename.c ma_check.c \ ma_keycache.c ma_preload.c ma_ft_parser.c \ diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 4680caed2da..4af0f955b8b 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -1928,25 +1928,7 @@ int maria_change_to_newfile(const char * filename, const char * old_ext, } /* maria_change_to_newfile */ - /* Locks a whole file */ - /* Gives an error-message if file can't be locked */ - -int maria_lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, - const char *filetype, const char *filename) -{ - if (my_lock(file,lock_type,start,F_TO_EOF, - param->testflag & T_WAIT_FOREVER ? MYF(MY_SEEK_NOT_DONE) : - MYF(MY_SEEK_NOT_DONE | MY_DONT_WAIT))) - { - _ma_check_print_error(param," %d when locking %s '%s'",my_errno,filetype,filename); - param->error_printed=2; /* Don't give that data is crashed */ - return 1; - } - return 0; -} /* maria_lock_file */ - - - /* Copy a block between two files */ +/* Copy a block between two files */ int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type) diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index f3a1b2ba261..5b940eaf4c3 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -113,7 +113,6 @@ int maria_close(register MARIA_HA *info) if (info->dfile >= 0 && my_close(info->dfile,MYF(0))) error = my_errno; - maria_log_command(MARIA_LOG_CLOSE,info,NULL,0,error); my_free((gptr) info,MYF(0)); if (error) diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 9e06b633171..d1ad9edbed5 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -101,7 +101,6 @@ int maria_delete(MARIA_HA *info,const byte *record) info->state->records--; mi_sizestore(lastpos,info->lastpos); - maria_log_command(MARIA_LOG_DELETE,info,(byte*) lastpos,sizeof(lastpos),0); VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) @@ -115,7 +114,6 @@ int maria_delete(MARIA_HA *info,const byte *record) err: save_errno=my_errno; mi_sizestore(lastpos,info->lastpos); - maria_log_command(MARIA_LOG_DELETE,info,(byte*) lastpos, sizeof(lastpos),0); if (save_errno != HA_ERR_RECORD_CHANGED) { maria_print_error(info->s, HA_ERR_CRASHED); diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index d71e4d7dce7..b16d82ed9f7 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -22,7 +22,6 @@ int maria_delete_all_rows(MARIA_HA *info) { uint i; - char buf[22]; MARIA_SHARE *share=info->s; MARIA_STATE_INFO *state=&share->state; DBUG_ENTER("maria_delete_all_rows"); @@ -49,7 +48,6 @@ int maria_delete_all_rows(MARIA_HA *info) for (i=0 ; i < share->base.keys ; i++) state->key_root[i]= HA_OFFSET_ERROR; - maria_log_command(MARIA_LOG_DELETE_ALL,info,(byte*) 0,0,0); /* If we are using delayed keys or if the user has done changes to the tables since it was locked then there may be key blocks in the key cache diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 0fbf28b949a..18efc0adbd0 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1529,12 +1529,6 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, if (info->lock_type == F_UNLCK) { #ifndef UNSAFE_LOCKING - if (share->tot_locks == 0) - { - if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, - MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) - DBUG_RETURN(my_errno); - } #else info->tmp_lock_type=F_RDLCK; #endif diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index bd5d4280c9d..06a36cba238 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -397,7 +397,6 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg { char tmp[1]; tmp[0]=function; - maria_log_command(MARIA_LOG_EXTRA,info,(byte*) tmp,1,error); } DBUG_RETURN(error); } /* maria_extra */ diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index fc526c6ca3a..318bbe341e4 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -52,7 +52,6 @@ void maria_end(void) if (maria_inited) { maria_inited= 0; - VOID(maria_logging(0)); /* Close log if neaded */ ft_free_stopwords(); pthread_mutex_destroy(&THR_LOCK_maria); } diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 697747021f8..adb4b03bebe 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -30,7 +30,6 @@ int maria_lock_database(MARIA_HA *info, int lock_type) int error; uint count; MARIA_SHARE *share=info->s; - uint flag; DBUG_ENTER("maria_lock_database"); DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u " "global_changed: %d open_count: %u name: '%s'", @@ -49,7 +48,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) DBUG_RETURN(0); } - flag=error=0; + error=0; pthread_mutex_lock(&share->intern_lock); if (share->kfile >= 0) /* May only be false on windows */ { @@ -117,23 +116,6 @@ int maria_lock_database(MARIA_HA *info, int lock_type) maria_mark_crashed(info); } } - if (info->lock_type != F_EXTRA_LCK) - { - if (share->r_locks) - { /* Only read locks left */ - flag=1; - if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, - MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) - error=my_errno; - } - else if (!share->w_locks) - { /* No more locks */ - flag=1; - if (my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, - MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) - error=my_errno; - } - } } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); info->lock_type= F_UNLCK; @@ -147,16 +129,6 @@ int maria_lock_database(MARIA_HA *info, int lock_type) mysqld does not turn write locks to read locks, so we're never here in mysqld. */ - if (share->w_locks == 1) - { - flag=1; - if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, - MYF(MY_SEEK_NOT_DONE))) - { - error=my_errno; - break; - } - } share->w_locks--; share->r_locks++; info->lock_type=lock_type; @@ -164,18 +136,9 @@ int maria_lock_database(MARIA_HA *info, int lock_type) } if (!share->r_locks && !share->w_locks) { - flag=1; - if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, - info->lock_wait | MY_SEEK_NOT_DONE)) - { - error=my_errno; - break; - } if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) { error=my_errno; - VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE))); - my_errno=error; break; } } @@ -189,13 +152,6 @@ int maria_lock_database(MARIA_HA *info, int lock_type) { /* Change READONLY to RW */ if (share->r_locks == 1) { - flag=1; - if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, - MYF(info->lock_wait | MY_SEEK_NOT_DONE))) - { - error=my_errno; - break; - } share->r_locks--; share->w_locks++; info->lock_type=lock_type; @@ -206,21 +162,11 @@ int maria_lock_database(MARIA_HA *info, int lock_type) { if (!share->w_locks) { - flag=1; - if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, - info->lock_wait | MY_SEEK_NOT_DONE)) - { - error=my_errno; - break; - } if (!share->r_locks) { if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) { error=my_errno; - VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, - info->lock_wait | MY_SEEK_NOT_DONE)); - my_errno=error; break; } } @@ -238,11 +184,6 @@ int maria_lock_database(MARIA_HA *info, int lock_type) } } pthread_mutex_unlock(&share->intern_lock); -#if defined(FULL_LOG) || defined(_lint) - lock_type|=(int) (flag << 8); /* Set bit to set if real lock */ - maria_log_command(MARIA_LOG_LOCK,info,(byte*) &lock_type,sizeof(lock_type), - error); -#endif DBUG_RETURN(error); } /* maria_lock_database */ @@ -381,14 +322,9 @@ int _ma_readinfo(register MARIA_HA *info, int lock_type, int check_keybuffer) MARIA_SHARE *share=info->s; if (!share->tot_locks) { - if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, - info->lock_wait | MY_SEEK_NOT_DONE)) - DBUG_RETURN(1); if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) { int error=my_errno ? my_errno : -1; - VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, - MYF(MY_SEEK_NOT_DONE))); my_errno=error; DBUG_RETURN(1); } @@ -438,10 +374,6 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation) } #endif } - if (!(operation & WRITEINFO_NO_UNLOCK) && - my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, - MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) - DBUG_RETURN(1); my_errno=olderror; } else if (operation) diff --git a/storage/maria/ma_log.c b/storage/maria/ma_log.c deleted file mode 100644 index 7c32c1068cb..00000000000 --- a/storage/maria/ma_log.c +++ /dev/null @@ -1,164 +0,0 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - Logging of MARIA commands and records on logfile for debugging - The log can be examined with help of the marialog command. -*/ - -#include "maria_def.h" -#if defined(MSDOS) || defined(__WIN__) -#include -#ifndef __WIN__ -#include -#endif -#endif -#ifdef VMS -#include -#endif - -#undef GETPID /* For HPUX */ -#ifdef THREAD -#define GETPID() (log_type == 1 ? (long) maria_pid : (long) my_thread_id()); -#else -#define GETPID() maria_pid -#endif - - /* Activate logging if flag is 1 and reset logging if flag is 0 */ - -static int log_type=0; -ulong maria_pid=0; - -int maria_logging(int activate_log) -{ - int error=0; - char buff[FN_REFLEN]; - DBUG_ENTER("maria_logging"); - - log_type=activate_log; - if (activate_log) - { - if (!maria_pid) - maria_pid=(ulong) getpid(); - if (maria_log_file < 0) - { - if ((maria_log_file = my_create(fn_format(buff,maria_log_filename, - "",".log",4), - 0,(O_RDWR | O_BINARY | O_APPEND),MYF(0))) - < 0) - DBUG_RETURN(my_errno); - } - } - else if (maria_log_file >= 0) - { - error=my_close(maria_log_file,MYF(0)) ? my_errno : 0 ; - maria_log_file= -1; - } - DBUG_RETURN(error); -} - - - /* Logging of records and commands on logfile */ - /* All logs starts with command(1) dfile(2) process(4) result(2) */ - -void _ma_log(enum maria_log_commands command, MARIA_HA *info, - const byte *buffert, uint length) -{ - char buff[11]; - int error,old_errno; - ulong pid=(ulong) GETPID(); - old_errno=my_errno; - bzero(buff,sizeof(buff)); - buff[0]=(char) command; - mi_int2store(buff+1,info->dfile); - mi_int4store(buff+3,pid); - mi_int2store(buff+9,length); - - pthread_mutex_lock(&THR_LOCK_maria); - error=my_lock(maria_log_file,F_WRLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); - VOID(my_write(maria_log_file,buff,sizeof(buff),MYF(0))); - VOID(my_write(maria_log_file,buffert,length,MYF(0))); - if (!error) - error=my_lock(maria_log_file,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); - pthread_mutex_unlock(&THR_LOCK_maria); - my_errno=old_errno; -} - - -void _ma_log_command(enum maria_log_commands command, MARIA_HA *info, - const byte *buffert, uint length, int result) -{ - char buff[9]; - int error,old_errno; - ulong pid=(ulong) GETPID(); - - old_errno=my_errno; - buff[0]=(char) command; - mi_int2store(buff+1,info->dfile); - mi_int4store(buff+3,pid); - mi_int2store(buff+7,result); - pthread_mutex_lock(&THR_LOCK_maria); - error=my_lock(maria_log_file,F_WRLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); - VOID(my_write(maria_log_file,buff,sizeof(buff),MYF(0))); - if (buffert) - VOID(my_write(maria_log_file,buffert,length,MYF(0))); - if (!error) - error=my_lock(maria_log_file,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); - pthread_mutex_unlock(&THR_LOCK_maria); - my_errno=old_errno; -} - - -void _ma_log_record(enum maria_log_commands command, MARIA_HA *info, - const byte *record, my_off_t filepos, int result) -{ - char buff[21],*pos; - int error,old_errno; - uint length; - ulong pid=(ulong) GETPID(); - - old_errno=my_errno; - if (!info->s->base.blobs) - length=info->s->base.reclength; - else - length=info->s->base.reclength+ _ma_calc_total_blob_length(info,record); - buff[0]=(char) command; - mi_int2store(buff+1,info->dfile); - mi_int4store(buff+3,pid); - mi_int2store(buff+7,result); - mi_sizestore(buff+9,filepos); - mi_int4store(buff+17,length); - pthread_mutex_lock(&THR_LOCK_maria); - error=my_lock(maria_log_file,F_WRLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); - VOID(my_write(maria_log_file,buff,sizeof(buff),MYF(0))); - VOID(my_write(maria_log_file,(byte*) record,info->s->base.reclength,MYF(0))); - if (info->s->base.blobs) - { - MARIA_BLOB *blob,*end; - - for (end=info->blobs+info->s->base.blobs, blob= info->blobs; - blob != end ; - blob++) - { - memcpy_fixed(&pos,record+blob->offset+blob->pack_length,sizeof(char*)); - VOID(my_write(maria_log_file,pos,blob->length,MYF(0))); - } - } - if (!error) - error=my_lock(maria_log_file,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)); - pthread_mutex_unlock(&THR_LOCK_maria); - my_errno=old_errno; -} diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index e1f1088c6d1..c695cbb1a44 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -75,7 +75,7 @@ MARIA_HA *_ma_test_if_reopen(char *filename) MARIA_HA *maria_open(const char *name, int mode, uint open_flags) { - int lock_error,kfile,open_mode,save_errno,have_rtree=0; + int kfile,open_mode,save_errno,have_rtree=0; uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys, key_parts,unique_key_parts,fulltext_keys,uniques; char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN], @@ -90,7 +90,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) LINT_INIT(m_info); kfile= -1; - lock_error=1; errpos=0; head_length=sizeof(share_buff.state.header); bzero((byte*) &info,sizeof(info)); @@ -176,14 +175,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) errpos=2; VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0))); - if (!(open_flags & HA_OPEN_TMP_TABLE)) - { - if ((lock_error=my_lock(kfile,F_RDLCK,0L,F_TO_EOF, - MYF(open_flags & HA_OPEN_WAIT_IF_LOCKED ? - 0 : MY_DONT_WAIT))) && - !(open_flags & HA_OPEN_IGNORE_IF_LOCKED)) - goto err; - } errpos=3; if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP))) { @@ -451,12 +442,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) } share->rec[i].type=(int) FIELD_LAST; /* End marker */ - if (! lock_error) - { - VOID(my_lock(kfile,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE))); - lock_error=1; /* Database unlocked */ - } - if (_ma_open_datafile(&info, share, -1)) goto err; errpos=5; @@ -613,11 +598,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) maria_open_list=list_add(maria_open_list,&m_info->open_list); pthread_mutex_unlock(&THR_LOCK_maria); - if (maria_log_file >= 0) - { - intern_filename(name_buff,share->index_file_name); - _ma_log(MARIA_LOG_OPEN,m_info,name_buff,(uint) strlen(name_buff)); - } DBUG_RETURN(m_info); err: @@ -639,8 +619,6 @@ err: my_free((gptr) share,MYF(0)); /* fall through */ case 3: - if (! lock_error) - VOID(my_lock(kfile, F_UNLCK, 0L, F_TO_EOF, MYF(MY_SEEK_NOT_DONE))); /* fall through */ case 2: my_afree((gptr) disk_cache); diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c index 7e972ecd850..511c5507aaf 100644 --- a/storage/maria/ma_static.c +++ b/storage/maria/ma_static.c @@ -28,8 +28,6 @@ uchar NEAR maria_file_magic[]= { (uchar) 254, (uchar) 254,'\007', '\001', }; uchar NEAR maria_pack_file_magic[]= { (uchar) 254, (uchar) 254,'\010', '\002', }; -my_string maria_log_filename=(char*) "maria.log"; -File maria_log_file= -1; uint maria_quick_table_bits=9; ulong maria_block_size= MARIA_KEY_BLOCK_LENGTH; my_bool maria_flush=0, maria_delay_key_write=0, maria_single_user=0; diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c index c9614de1c72..0aef24f40a9 100644 --- a/storage/maria/ma_statrec.c +++ b/storage/maria/ma_statrec.c @@ -241,9 +241,6 @@ int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, if ((! cache_read || share->base.reclength > cache_length) && share->tot_locks == 0) { /* record not in cache */ - if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, - MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) - DBUG_RETURN(my_errno); locked=1; } #else diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 06cfaf7cc5e..7dd1ffeb7fd 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -44,7 +44,7 @@ static void copy_key(struct st_maria_info *info,uint inx, static int verbose=0,testflag=0, first_key=0,async_io=0,key_cacheing=0,write_cacheing=0,locking=0, - rec_pointer_size=0,pack_fields=1,use_log=0,silent=0, + rec_pointer_size=0,pack_fields=1,silent=0, opt_quick_mode=0; static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1, create_flag=0; @@ -209,8 +209,6 @@ int main(int argc, char *argv[]) 0,(MARIA_UNIQUEDEF*) 0, &create_info,create_flag)) goto err; - if (use_log) - maria_logging(1); if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) goto err; if (!silent) @@ -894,9 +892,6 @@ static void get_options(int argc, char **argv) if (*++pos) srand(atoi(pos)); break; - case 'l': - use_log=1; - break; case 'L': locking=1; break; diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c index bfb2c93a95f..d2fe8d90cf6 100644 --- a/storage/maria/ma_test3.c +++ b/storage/maria/ma_test3.c @@ -41,7 +41,7 @@ const char *filename= "test3"; -uint tests=10,forks=10,key_cacheing=0,use_log=0; +uint tests=10,forks=10,key_cacheing=0; static void get_options(int argc, char *argv[]); void start_test(int id); @@ -127,9 +127,6 @@ static void get_options(int argc, char **argv) while (--argc >0 && *(pos = *(++argv)) == '-' ) { switch(*++pos) { - case 'l': - use_log=1; - break; case 'f': forks=atoi(++pos); break; @@ -140,7 +137,7 @@ static void get_options(int argc, char **argv) key_cacheing=1; break; case 'A': /* All flags */ - use_log=key_cacheing=1; + key_cacheing=1; break; case '?': case 'I': @@ -169,8 +166,6 @@ void start_test(int id) MARIA_INFO isam_info; MARIA_HA *file,*file1,*file2=0,*lock; - if (use_log) - maria_logging(1); if (!(file1=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)) || !(file2=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) { @@ -214,8 +209,6 @@ void start_test(int id) maria_close(file1); maria_close(file2); - if (use_log) - maria_logging(0); if (error) { printf("%2d: Aborted\n",id); fflush(stdout); diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index d3c71cef9b4..3f1ba4b1fac 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -170,7 +170,6 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | HA_STATE_AKTIV | key_changed); - maria_log_record(MARIA_LOG_UPDATE,info,newrec,info->lastpos,0); VOID(_ma_writeinfo(info,key_changed ? WRITEINFO_UPDATE_KEYFILE : 0)); allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) @@ -220,7 +219,6 @@ err: key_changed); err_end: - maria_log_record(MARIA_LOG_UPDATE,info,newrec,info->lastpos,my_errno); VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ if (save_errno == HA_ERR_KEY_NOT_FOUND) diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 313a362f6d6..5a1a540b88b 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -62,11 +62,6 @@ int maria_write(MARIA_HA *info, byte *record) if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); dont_break(); /* Dont allow SIGHUP or SIGINT */ -#if !defined(NO_LOCKING) && defined(USE_RECORD_LOCK) - if (!info->locked && my_lock(info->dfile,F_WRLCK,0L,F_TO_EOF, - MYF(MY_SEEK_NOT_DONE) | info->lock_wait)) - goto err; -#endif filepos= ((share->state.dellink != HA_OFFSET_ERROR && !info->append_insert_at_end) ? share->state.dellink : @@ -155,7 +150,6 @@ int maria_write(MARIA_HA *info, byte *record) HA_STATE_ROW_CHANGED); info->state->records++; info->lastpos=filepos; - maria_log_record(MARIA_LOG_WRITE,info,record,filepos,0); VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); if (info->invalidator != 0) { @@ -220,7 +214,6 @@ err: my_errno=save_errno; err2: save_errno=my_errno; - maria_log_record(MARIA_LOG_WRITE,info,record,filepos,my_errno); VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index de76344a800..7f48a0805da 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -1158,7 +1158,6 @@ static int mariachk(HA_CHECK *param, my_string filename) (state_updated ? UPDATE_STAT : 0) | ((param->testflag & T_SORT_RECORDS) ? UPDATE_SORT : 0))); - VOID(maria_lock_file(param, share->kfile,0L,F_UNLCK,"indexfile",filename)); info->update&= ~HA_STATE_CHANGED; } maria_lock_database(info, F_UNLCK); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index e3f0219a663..9bab65126c7 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -427,8 +427,6 @@ extern LIST *maria_open_list; extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; extern uint maria_quick_table_bits; -extern File maria_log_file; -extern ulong maria_pid; /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ @@ -648,16 +646,6 @@ typedef struct st_maria_block_info #define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) #define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) -enum maria_log_commands -{ - MARIA_LOG_OPEN, MARIA_LOG_WRITE, MARIA_LOG_UPDATE, MARIA_LOG_DELETE, - MARIA_LOG_CLOSE, MARIA_LOG_EXTRA, MARIA_LOG_LOCK, MARIA_LOG_DELETE_ALL -}; - -#define maria_log(a,b,c,d) if (maria_log_file >= 0) _ma_log(a,b,c,d) -#define maria_log_command(a,b,c,d,e) if (maria_log_file >= 0) _ma_log_command(a,b,c,d,e) -#define maria_log_record(a,b,c,d,e) if (maria_log_file >= 0) _ma_log_record(a,b,c,d,e) - #define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0) #define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1) @@ -666,14 +654,6 @@ extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from); extern uint _ma_pack_get_block_info(MARIA_HA *, MARIA_BLOCK_INFO *, File, my_off_t); extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length); -extern void _ma_log(enum maria_log_commands command, MARIA_HA *info, - const byte *buffert, uint length); -extern void _ma_log_command(enum maria_log_commands command, - MARIA_HA *info, const byte *buffert, - uint length, int result); -extern void _ma_log_record(enum maria_log_commands command, MARIA_HA *info, - const byte *record, my_off_t filepos, - int result); extern void _ma_report_error(int errcode, const char *file_name); extern my_bool _ma_memmap_file(MARIA_HA *info); extern void _ma_unmap_file(MARIA_HA *info); diff --git a/storage/maria/maria_log.c b/storage/maria/maria_log.c deleted file mode 100644 index 72a4d7e89d5..00000000000 --- a/storage/maria/maria_log.c +++ /dev/null @@ -1,848 +0,0 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* write whats in isam.log */ - -#ifndef USE_MY_FUNC -#define USE_MY_FUNC -#endif - -#include "maria_def.h" -#include -#include -#ifdef HAVE_GETRUSAGE -#include -#endif - -#define FILENAME(A) (A ? A->show_name : "Unknown") - -struct file_info { - long process; - int filenr,id; - uint rnd; - my_string name,show_name,record; - MARIA_HA *isam; - bool closed,used; - ulong accessed; -}; - -struct test_if_open_param { - my_string name; - int max_id; -}; - -struct st_access_param -{ - ulong min_accessed; - struct file_info *found; -}; - -#define NO_FILEPOS (ulong) ~0L - -extern int main(int argc,char * *argv); -static void get_options(int *argc,char ***argv); -static int examine_log(my_string file_name,char **table_names); -static int read_string(IO_CACHE *file,gptr *to,uint length); -static int file_info_compare(void *cmp_arg, void *a,void *b); -static int test_if_open(struct file_info *key,element_count count, - struct test_if_open_param *param); -static void fix_blob_pointers(MARIA_HA *isam,byte *record); -static int test_when_accessed(struct file_info *key,element_count count, - struct st_access_param *access_param); -static void file_info_free(struct file_info *info); -static int close_some_file(TREE *tree); -static int reopen_closed_file(TREE *tree,struct file_info *file_info); -static int find_record_with_key(struct file_info *file_info,byte *record); -static void printf_log(const char *str,...); -static bool cmp_filename(struct file_info *file_info,my_string name); - -static uint verbose=0,update=0,test_info=0,max_files=0,re_open_count=0, - recover=0,prefix_remove=0,opt_processes=0; -static my_string log_filename=0,filepath=0,write_filename=0,record_pos_file=0; -static ulong com_count[10][3],number_of_commands=(ulong) ~0L, - isamlog_process; -static my_off_t isamlog_filepos,start_offset=0,record_pos= HA_OFFSET_ERROR; -static const char *command_name[]= -{"open","write","update","delete","close","extra","lock","re-open", - "delete-all", NullS}; - - -int main(int argc, char **argv) -{ - int error,i,first; - ulong total_count,total_error,total_recover; - MY_INIT(argv[0]); - - log_filename=maria_log_filename; - get_options(&argc,&argv); - maria_init(); - - /* Number of MARIA files we can have open at one time */ - max_files= (my_set_max_open_files(min(max_files,8))-6)/2; - if (update) - printf("Trying to %s MARIA files according to log '%s'\n", - (recover ? "recover" : "update"),log_filename); - error= examine_log(log_filename,argv); - if (update && ! error) - puts("Tables updated successfully"); - total_count=total_error=total_recover=0; - for (i=first=0 ; command_name[i] ; i++) - { - if (com_count[i][0]) - { - if (!first++) - { - if (verbose || update) - puts(""); - puts("Commands Used count Errors Recover errors"); - } - printf("%-12s%9ld%10ld%17ld\n",command_name[i],com_count[i][0], - com_count[i][1],com_count[i][2]); - total_count+=com_count[i][0]; - total_error+=com_count[i][1]; - total_recover+=com_count[i][2]; - } - } - if (total_count) - printf("%-12s%9ld%10ld%17ld\n","Total",total_count,total_error, - total_recover); - if (re_open_count) - printf("Had to do %d re-open because of too few possibly open files\n", - re_open_count); - VOID(maria_panic(HA_PANIC_CLOSE)); - my_free_open_file_info(); - maria_end(); - my_end(test_info ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); - exit(error); - return 0; /* No compiler warning */ -} /* main */ - - -static void get_options(register int *argc, register char ***argv) -{ - int help,version; - const char *pos,*usage; - char option; - - help=0; - usage="Usage: %s [-?iruvDIV] [-c #] [-f #] [-F filepath/] [-o #] [-R file recordpos] [-w write_file] [log-filename [table ...]] \n"; - pos=""; - - while (--*argc > 0 && *(pos = *(++*argv)) == '-' ) { - while (*++pos) - { - version=0; - switch((option=*pos)) { - case '#': - DBUG_PUSH (++pos); - pos=" "; /* Skip rest of arg */ - break; - case 'c': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - number_of_commands=(ulong) atol(pos); - pos=" "; - break; - case 'u': - update=1; - break; - case 'f': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - max_files=(uint) atoi(pos); - pos=" "; - break; - case 'i': - test_info=1; - break; - case 'o': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - start_offset=(my_off_t) strtoll(pos,NULL,10); - pos=" "; - break; - case 'p': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - prefix_remove=atoi(pos); - break; - case 'r': - update=1; - recover++; - break; - case 'P': - opt_processes=1; - break; - case 'R': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - record_pos_file=(char*) pos; - if (!--*argc) - goto err; - record_pos=(my_off_t) strtoll(*(++*argv),NULL,10); - pos=" "; - break; - case 'v': - verbose++; - break; - case 'w': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - write_filename=(char*) pos; - pos=" "; - break; - case 'F': - if (! *++pos) - { - if (!--*argc) - goto err; - else - pos= *(++*argv); - } - filepath= (char*) pos; - pos=" "; - break; - case 'V': - version=1; - /* Fall through */ - case 'I': - case '?': -#include - printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE, - MACHINE_TYPE); - puts("By Monty, for your professional use\n"); - if (version) - break; - puts("Write info about whats in a MARIA log file."); - printf("If no file name is given %s is used\n",log_filename); - puts(""); - printf(usage,my_progname); - puts(""); - puts("Options: -? or -I \"Info\" -V \"version\" -c \"do only # commands\""); - puts(" -f \"max open files\" -F \"filepath\" -i \"extra info\""); - puts(" -o \"offset\" -p # \"remove # components from path\""); - puts(" -r \"recover\" -R \"file recordposition\""); - puts(" -u \"update\" -v \"verbose\" -w \"write file\""); - puts(" -D \"maria compiled with DBUG\" -P \"processes\""); - puts("\nOne can give a second and a third '-v' for more verbose."); - puts("Normaly one does a update (-u)."); - puts("If a recover is done all writes and all possibly updates and deletes is done\nand errors are only counted."); - puts("If one gives table names as arguments only these tables will be updated\n"); - help=1; -#include - break; - default: - printf("illegal option: \"-%c\"\n",*pos); - break; - } - } - } - if (! *argc) - { - if (help) - exit(0); - (*argv)++; - } - if (*argc >= 1) - { - log_filename=(char*) pos; - (*argc)--; - (*argv)++; - } - return; - err: - VOID(fprintf(stderr,"option \"%c\" used without or with wrong argument\n", - option)); - exit(1); -} - - -static int examine_log(my_string file_name, char **table_names) -{ - uint command,result,files_open; - ulong access_time,length; - my_off_t filepos; - int lock_command,maria_result; - char isam_file_name[FN_REFLEN],llbuff[21],llbuff2[21]; - uchar head[20]; - gptr buff; - struct test_if_open_param open_param; - IO_CACHE cache; - File file; - FILE *write_file; - enum ha_extra_function extra_command; - TREE tree; - struct file_info file_info,*curr_file_info; - DBUG_ENTER("examine_log"); - - if ((file=my_open(file_name,O_RDONLY,MYF(MY_WME))) < 0) - DBUG_RETURN(1); - write_file=0; - if (write_filename) - { - if (!(write_file=my_fopen(write_filename,O_WRONLY,MYF(MY_WME)))) - { - my_close(file,MYF(0)); - DBUG_RETURN(1); - } - } - - init_io_cache(&cache,file,0,READ_CACHE,start_offset,0,MYF(0)); - bzero((gptr) com_count,sizeof(com_count)); - init_tree(&tree,0,0,sizeof(file_info),(qsort_cmp2) file_info_compare,1, - (tree_element_free) file_info_free, NULL); - VOID(init_key_cache(maria_key_cache,KEY_CACHE_BLOCK_SIZE,KEY_CACHE_SIZE, - 0, 0)); - - files_open=0; access_time=0; - while (access_time++ != number_of_commands && - !my_b_read(&cache,(byte*) head,9)) - { - isamlog_filepos=my_b_tell(&cache)-9L; - file_info.filenr= mi_uint2korr(head+1); - isamlog_process=file_info.process=(long) mi_uint4korr(head+3); - if (!opt_processes) - file_info.process=0; - result= mi_uint2korr(head+7); - if ((curr_file_info=(struct file_info*) tree_search(&tree, &file_info, - tree.custom_arg))) - { - curr_file_info->accessed=access_time; - if (update && curr_file_info->used && curr_file_info->closed) - { - if (reopen_closed_file(&tree,curr_file_info)) - { - command=sizeof(com_count)/sizeof(com_count[0][0])/3; - result=0; - goto com_err; - } - } - } - command=(uint) head[0]; - if (command < sizeof(com_count)/sizeof(com_count[0][0])/3 && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - { - com_count[command][0]++; - if (result) - com_count[command][1]++; - } - switch ((enum maria_log_commands) command) { - case MARIA_LOG_OPEN: - if (!table_names[0]) - { - com_count[command][0]--; /* Must be counted explicite */ - if (result) - com_count[command][1]--; - } - - if (curr_file_info) - printf("\nWarning: %s is opened with same process and filenumber\nMaybe you should use the -P option ?\n", - curr_file_info->show_name); - if (my_b_read(&cache,(byte*) head,2)) - goto err; - file_info.name=0; - file_info.show_name=0; - file_info.record=0; - if (read_string(&cache,(gptr*) &file_info.name, - (uint) mi_uint2korr(head))) - goto err; - { - uint i; - char *pos,*to; - - /* Fix if old DOS files to new format */ - for (pos=file_info.name; (pos=strchr(pos,'\\')) ; pos++) - *pos= '/'; - - pos=file_info.name; - for (i=0 ; i < prefix_remove ; i++) - { - char *next; - if (!(next=strchr(pos,'/'))) - break; - pos=next+1; - } - to=isam_file_name; - if (filepath) - to=convert_dirname(isam_file_name,filepath,NullS); - strmov(to,pos); - fn_ext(isam_file_name)[0]=0; /* Remove extension */ - } - open_param.name=file_info.name; - open_param.max_id=0; - VOID(tree_walk(&tree,(tree_walk_action) test_if_open,(void*) &open_param, - left_root_right)); - file_info.id=open_param.max_id+1; - /* - * In the line below +10 is added to accomodate '<' and '>' chars - * plus '\0' at the end, so that there is place for 7 digits. - * It is improbable that same table can have that many entries in - * the table cache. - * The additional space is needed for the sprintf commands two lines - * below. - */ - file_info.show_name=my_memdup(isam_file_name, - (uint) strlen(isam_file_name)+10, - MYF(MY_WME)); - if (file_info.id > 1) - sprintf(strend(file_info.show_name),"<%d>",file_info.id); - file_info.closed=1; - file_info.accessed=access_time; - file_info.used=1; - if (table_names[0]) - { - char **name; - file_info.used=0; - for (name=table_names ; *name ; name++) - { - if (!strcmp(*name,isam_file_name)) - file_info.used=1; /* Update/log only this */ - } - } - if (update && file_info.used) - { - if (files_open >= max_files) - { - if (close_some_file(&tree)) - goto com_err; - files_open--; - } - if (!(file_info.isam= maria_open(isam_file_name,O_RDWR, - HA_OPEN_WAIT_IF_LOCKED))) - goto com_err; - if (!(file_info.record=my_malloc(file_info.isam->s->base.reclength, - MYF(MY_WME)))) - goto end; - files_open++; - file_info.closed=0; - } - VOID(tree_insert(&tree, (gptr) &file_info, 0, tree.custom_arg)); - if (file_info.used) - { - if (verbose && !record_pos_file) - printf_log("%s: open -> %d",file_info.show_name, file_info.filenr); - com_count[command][0]++; - if (result) - com_count[command][1]++; - } - break; - case MARIA_LOG_CLOSE: - if (verbose && !record_pos_file && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - printf_log("%s: %s -> %d",FILENAME(curr_file_info), - command_name[command],result); - if (curr_file_info) - { - if (!curr_file_info->closed) - files_open--; - VOID(tree_delete(&tree, (gptr) curr_file_info, tree.custom_arg)); - } - break; - case MARIA_LOG_EXTRA: - if (my_b_read(&cache,(byte*) head,1)) - goto err; - extra_command=(enum ha_extra_function) head[0]; - if (verbose && !record_pos_file && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - printf_log("%s: %s(%d) -> %d",FILENAME(curr_file_info), - command_name[command], (int) extra_command,result); - if (update && curr_file_info && !curr_file_info->closed) - { - if (maria_extra(curr_file_info->isam, extra_command, 0) != (int) result) - { - fflush(stdout); - VOID(fprintf(stderr, - "Warning: error %d, expected %d on command %s at %s\n", - my_errno,result,command_name[command], - llstr(isamlog_filepos,llbuff))); - fflush(stderr); - } - } - break; - case MARIA_LOG_DELETE: - if (my_b_read(&cache,(byte*) head,8)) - goto err; - filepos=mi_sizekorr(head); - if (verbose && (!record_pos_file || - ((record_pos == filepos || record_pos == NO_FILEPOS) && - !cmp_filename(curr_file_info,record_pos_file))) && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - printf_log("%s: %s at %ld -> %d",FILENAME(curr_file_info), - command_name[command],(long) filepos,result); - if (update && curr_file_info && !curr_file_info->closed) - { - if (maria_rrnd(curr_file_info->isam,curr_file_info->record,filepos)) - { - if (!recover) - goto com_err; - if (verbose) - printf_log("error: Didn't find row to delete with maria_rrnd"); - com_count[command][2]++; /* Mark error */ - } - maria_result=maria_delete(curr_file_info->isam,curr_file_info->record); - if ((maria_result == 0 && result) || - (maria_result && (uint) my_errno != result)) - { - if (!recover) - goto com_err; - if (maria_result) - com_count[command][2]++; /* Mark error */ - if (verbose) - printf_log("error: Got result %d from maria_delete instead of %d", - maria_result, result); - } - } - break; - case MARIA_LOG_WRITE: - case MARIA_LOG_UPDATE: - if (my_b_read(&cache,(byte*) head,12)) - goto err; - filepos=mi_sizekorr(head); - length=mi_uint4korr(head+8); - buff=0; - if (read_string(&cache,&buff,(uint) length)) - goto err; - if ((!record_pos_file || - ((record_pos == filepos || record_pos == NO_FILEPOS) && - !cmp_filename(curr_file_info,record_pos_file))) && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - { - if (write_file && - (my_fwrite(write_file,buff,length,MYF(MY_WAIT_IF_FULL | MY_NABP)))) - goto end; - if (verbose) - printf_log("%s: %s at %ld, length=%ld -> %d", - FILENAME(curr_file_info), - command_name[command], filepos,length,result); - } - if (update && curr_file_info && !curr_file_info->closed) - { - if (curr_file_info->isam->s->base.blobs) - fix_blob_pointers(curr_file_info->isam,buff); - if ((enum maria_log_commands) command == MARIA_LOG_UPDATE) - { - if (maria_rrnd(curr_file_info->isam,curr_file_info->record,filepos)) - { - if (!recover) - { - result=0; - goto com_err; - } - if (verbose) - printf_log("error: Didn't find row to update with maria_rrnd"); - if (recover == 1 || result || - find_record_with_key(curr_file_info,buff)) - { - com_count[command][2]++; /* Mark error */ - break; - } - } - maria_result=maria_update(curr_file_info->isam,curr_file_info->record, - buff); - if ((maria_result == 0 && result) || - (maria_result && (uint) my_errno != result)) - { - if (!recover) - goto com_err; - if (verbose) - printf_log("error: Got result %d from maria_update instead of %d", - maria_result, result); - if (maria_result) - com_count[command][2]++; /* Mark error */ - } - } - else - { - maria_result=maria_write(curr_file_info->isam,buff); - if ((maria_result == 0 && result) || - (maria_result && (uint) my_errno != result)) - { - if (!recover) - goto com_err; - if (verbose) - printf_log("error: Got result %d from maria_write instead of %d", - maria_result, result); - if (maria_result) - com_count[command][2]++; /* Mark error */ - } - if (!recover && filepos != curr_file_info->isam->lastpos) - { - printf("error: Wrote at position: %s, should have been %s", - llstr(curr_file_info->isam->lastpos,llbuff), - llstr(filepos,llbuff2)); - goto end; - } - } - } - my_free(buff,MYF(0)); - break; - case MARIA_LOG_LOCK: - if (my_b_read(&cache,(byte*) head,sizeof(lock_command))) - goto err; - memcpy_fixed(&lock_command,head,sizeof(lock_command)); - if (verbose && !record_pos_file && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - printf_log("%s: %s(%d) -> %d\n",FILENAME(curr_file_info), - command_name[command],lock_command,result); - if (update && curr_file_info && !curr_file_info->closed) - { - if (maria_lock_database(curr_file_info->isam,lock_command) != - (int) result) - goto com_err; - } - break; - case MARIA_LOG_DELETE_ALL: - if (verbose && !record_pos_file && - (!table_names[0] || (curr_file_info && curr_file_info->used))) - printf_log("%s: %s -> %d\n",FILENAME(curr_file_info), - command_name[command],result); - break; - default: - fflush(stdout); - VOID(fprintf(stderr, - "Error: found unknown command %d in logfile, aborted\n", - command)); - fflush(stderr); - goto end; - } - } - end_key_cache(maria_key_cache,1); - delete_tree(&tree); - VOID(end_io_cache(&cache)); - VOID(my_close(file,MYF(0))); - if (write_file && my_fclose(write_file,MYF(MY_WME))) - DBUG_RETURN(1); - DBUG_RETURN(0); - - err: - fflush(stdout); - VOID(fprintf(stderr,"Got error %d when reading from logfile\n",my_errno)); - fflush(stderr); - goto end; - com_err: - fflush(stdout); - VOID(fprintf(stderr,"Got error %d, expected %d on command %s at %s\n", - my_errno,result,command_name[command], - llstr(isamlog_filepos,llbuff))); - fflush(stderr); - end: - end_key_cache(maria_key_cache, 1); - delete_tree(&tree); - VOID(end_io_cache(&cache)); - VOID(my_close(file,MYF(0))); - if (write_file) - VOID(my_fclose(write_file,MYF(MY_WME))); - DBUG_RETURN(1); -} - - -static int read_string(IO_CACHE *file, register gptr *to, register uint length) -{ - DBUG_ENTER("read_string"); - - if (*to) - my_free((gptr) *to,MYF(0)); - if (!(*to= (gptr) my_malloc(length+1,MYF(MY_WME))) || - my_b_read(file,(byte*) *to,length)) - { - if (*to) - my_free(*to,MYF(0)); - *to= 0; - DBUG_RETURN(1); - } - *((char*) *to+length)= '\0'; - DBUG_RETURN (0); -} /* read_string */ - - -static int file_info_compare(void* cmp_arg __attribute__((unused)), - void *a, void *b) -{ - long lint; - - if ((lint=((struct file_info*) a)->process - - ((struct file_info*) b)->process)) - return lint < 0L ? -1 : 1; - return ((struct file_info*) a)->filenr - ((struct file_info*) b)->filenr; -} - - /* ARGSUSED */ - -static int test_if_open (struct file_info *key, - element_count count __attribute__((unused)), - struct test_if_open_param *param) -{ - if (!strcmp(key->name,param->name) && key->id > param->max_id) - param->max_id=key->id; - return 0; -} - - -static void fix_blob_pointers(MARIA_HA *info, byte *record) -{ - byte *pos; - MARIA_BLOB *blob,*end; - - pos=record+info->s->base.reclength; - for (end=info->blobs+info->s->base.blobs, blob= info->blobs; - blob != end ; - blob++) - { - memcpy_fixed(record+blob->offset+blob->pack_length,&pos,sizeof(char*)); - pos+= _ma_calc_blob_length(blob->pack_length,record+blob->offset); - } -} - - /* close the file with hasn't been accessed for the longest time */ - /* ARGSUSED */ - -static int test_when_accessed (struct file_info *key, - element_count count __attribute__((unused)), - struct st_access_param *access_param) -{ - if (key->accessed < access_param->min_accessed && ! key->closed) - { - access_param->min_accessed=key->accessed; - access_param->found=key; - } - return 0; -} - - -static void file_info_free(struct file_info *fileinfo) -{ - DBUG_ENTER("file_info_free"); - if (update) - { - if (!fileinfo->closed) - VOID(maria_close(fileinfo->isam)); - if (fileinfo->record) - my_free(fileinfo->record,MYF(0)); - } - my_free(fileinfo->name,MYF(0)); - my_free(fileinfo->show_name,MYF(0)); - DBUG_VOID_RETURN; -} - - - -static int close_some_file(TREE *tree) -{ - struct st_access_param access_param; - - access_param.min_accessed=LONG_MAX; - access_param.found=0; - - VOID(tree_walk(tree,(tree_walk_action) test_when_accessed, - (void*) &access_param,left_root_right)); - if (!access_param.found) - return 1; /* No open file that is possibly to close */ - if (maria_close(access_param.found->isam)) - return 1; - access_param.found->closed=1; - return 0; -} - - -static int reopen_closed_file(TREE *tree, struct file_info *fileinfo) -{ - char name[FN_REFLEN]; - if (close_some_file(tree)) - return 1; /* No file to close */ - strmov(name,fileinfo->show_name); - if (fileinfo->id > 1) - *strrchr(name,'<')='\0'; /* Remove "" */ - - if (!(fileinfo->isam= maria_open(name,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) - return 1; - fileinfo->closed=0; - re_open_count++; - return 0; -} - - /* Try to find record with uniq key */ - -static int find_record_with_key(struct file_info *file_info, byte *record) -{ - uint key; - MARIA_HA *info=file_info->isam; - uchar tmp_key[HA_MAX_KEY_BUFF]; - - for (key=0 ; key < info->s->base.keys ; key++) - { - if (maria_is_key_active(info->s->state.key_map, key) && - info->s->keyinfo[key].flag & HA_NOSAME) - { - VOID(_ma_make_key(info,key,tmp_key,record,0L)); - return maria_rkey(info,file_info->record,(int) key,(char*) tmp_key,0, - HA_READ_KEY_EXACT); - } - } - return 1; -} - - -static void printf_log(const char *format,...) -{ - char llbuff[21]; - va_list args; - va_start(args,format); - if (verbose > 2) - printf("%9s:",llstr(isamlog_filepos,llbuff)); - if (verbose > 1) - printf("%5ld ",isamlog_process); /* Write process number */ - (void) vprintf((char*) format,args); - putchar('\n'); - va_end(args); -} - - -static bool cmp_filename(struct file_info *file_info, my_string name) -{ - if (!file_info) - return 1; - return strcmp(file_info->name,name) ? 1 : 0; -} diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index 2a83bbb0f3f..514d8ba0bf5 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -1107,16 +1107,16 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) my_off_t total_count; char llbuf[32]; - DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1)); + DBUG_PRINT("info", ("column: %3lu", count - huff_counts + 1)); if (verbose >= 2) - VOID(printf("column: %3u\n", count - huff_counts + 1)); + VOID(printf("column: %3lu\n", count - huff_counts + 1)); if (count->tree_buff) { - DBUG_PRINT("info", ("number of distinct values: %u", + DBUG_PRINT("info", ("number of distinct values: %lu", (count->tree_pos - count->tree_buff) / count->field_length)); if (verbose >= 2) - VOID(printf("number of distinct values: %u\n", + VOID(printf("number of distinct values: %lu\n", (count->tree_pos - count->tree_buff) / count->field_length)); } @@ -2281,7 +2281,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) if (bits > 8 * sizeof(code)) { VOID(fflush(stdout)); - VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + VOID(fprintf(stderr, "error: Huffman code too long: %u/%lu\n", bits, 8 * sizeof(code))); errors++; break; From c262b880a5f74bc938d8925f4dda8ada57bc272b Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 24 May 2006 16:44:26 +0300 Subject: [PATCH 09/95] postmerge fix BUILD/SETUP.sh: postmerge changes configure.in: postmerge changes mysql-test/r/ps_maria.result: changed result according last MyISAM test changes --- BUILD/SETUP.sh | 1 - configure.in | 9 +++++---- mysql-test/r/ps_maria.result | 4 ++-- sql/Makefile.am | 4 ++-- sql/ha_maria.cc | 21 +++++++++++++++++++-- 5 files changed, 28 insertions(+), 11 deletions(-) diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index 5efa42e4619..754883bb20b 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -141,7 +141,6 @@ base_configs="--prefix=$prefix --enable-assembler " base_configs="$base_configs --with-extra-charsets=complex " base_configs="$base_configs --enable-thread-safe-client --with-readline " base_configs="$base_configs --with-big-tables" -base_configs="$base_configs --with-maria-storage-engine" # we need local-infile in all binaries for rpl000001 # if you need to disable local-infile in the client, write a build script diff --git a/configure.in b/configure.in index 90ca6c69587..d4905ff7c44 100644 --- a/configure.in +++ b/configure.in @@ -2252,13 +2252,14 @@ MYSQL_PLUGIN_ACTIONS(ndbcluster,[MYSQL_SETUP_NDBCLUSTER]) MYSQL_STORAGE_ENGINE(partition, partition, [Partition Support], [MySQL Partitioning Support], [max,max-no-ndb]) +MYSQL_STORAGE_ENGINE(maria,maria, [Maria Storage Engine], + [Traditional transactional MySQL tables]) +MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) +MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) +MYSQL_PLUGIN_MANDATORY(maria) dnl -- ndbcluster requires partition to be enabled MYSQL_PLUGIN_DEPENDS(ndbcluster, [partition]) -MYSQL_STORAGE_ENGINE(maria,,,,,,storage/maria,, - \$(top_builddir)/storage/maria/libmaria.a, [ - AC_CONFIG_FILES(storage/maria/Makefile) -]) MYSQL_CONFIGURE_PLUGINS([none]) diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result index a5cb3050dcb..00e32cf8fce 100644 --- a/mysql-test/r/ps_maria.result +++ b/mysql-test/r/ps_maria.result @@ -1777,7 +1777,7 @@ Table Create Table t5 CREATE TABLE `t5` ( `const01` bigint(1) NOT NULL DEFAULT '0', `param01` bigint(20) DEFAULT NULL, - `const02` decimal(2,1) unsigned NOT NULL DEFAULT '0.0', + `const02` decimal(2,1) NOT NULL DEFAULT '0.0', `param02` decimal(65,30) DEFAULT NULL, `const03` double NOT NULL DEFAULT '0', `param03` double DEFAULT NULL, @@ -1807,7 +1807,7 @@ select * from t5 ; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr def test t5 t5 const01 const01 8 1 1 N 32769 0 63 def test t5 t5 param01 param01 8 20 1 Y 32768 0 63 -def test t5 t5 const02 const02 246 3 3 N 33 1 63 +def test t5 t5 const02 const02 246 4 3 N 1 1 63 def test t5 t5 param02 param02 246 67 32 Y 0 30 63 def test t5 t5 const03 const03 5 17 1 N 32769 31 63 def test t5 t5 param03 param03 5 23 1 Y 32768 31 63 diff --git a/sql/Makefile.am b/sql/Makefile.am index eec7209bf50..1c4a2ec5402 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -52,7 +52,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ ha_heap.h ha_myisam.h ha_myisammrg.h ha_partition.h \ ha_innodb.h ha_berkeley.h ha_federated.h \ ha_ndbcluster.h ha_ndbcluster_binlog.h \ - ha_ndbcluster_tables.h \ + ha_ndbcluster_tables.h ha_maria.h\ opt_range.h protocol.h rpl_tblmap.h \ log.h sql_show.h rpl_rli.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h \ @@ -88,7 +88,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ records.cc filesort.cc handler.cc \ ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ ha_partition.cc ha_innodb.cc ha_berkeley.cc \ - ha_federated.cc \ + ha_federated.cc ha_maria.cc\ ha_ndbcluster.cc ha_ndbcluster_binlog.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ diff --git a/sql/ha_maria.cc b/sql/ha_maria.cc index 46324d0b188..740dfa85a23 100644 --- a/sql/ha_maria.cc +++ b/sql/ha_maria.cc @@ -63,13 +63,16 @@ TYPELIB maria_stats_method_typelib= static handler *maria_create_handler(TABLE_SHARE * table); /* MARIA handlerton */ +static const char maria_hton_name[]= "MARIA"; +static const char maria_hton_comment[]= + "Transactional storage engine, optimized for long running transactions"; handlerton maria_hton= { MYSQL_HANDLERTON_INTERFACE_VERSION, - "MARIA", + maria_hton_name, SHOW_OPTION_YES, - "Transactional storage engine, optimized for long running transactions", + maria_hton_comment, DB_TYPE_MARIA, ha_maria_init, 0, /* slot */ @@ -1834,3 +1837,17 @@ bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, return COMPATIBLE_DATA_NO; return COMPATIBLE_DATA_YES; } + +mysql_declare_plugin(maria) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &maria_hton, + maria_hton_name, + "MySQL AB", + maria_hton_comment, + NULL, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + 0 +} +mysql_declare_plugin_end; From fbe22b6020a5d41e5cc0819192d437f3963fd298 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 6 Jul 2006 11:10:34 +0300 Subject: [PATCH 10/95] The pagae cache added. mysys/Makefile.am: the page cache and test proms for it added include/pagecache.h: New BitKeeper file ``include/pagecache.h'' mysys/mf_pagecache.c: New BitKeeper file ``mysys/mf_pagecache.c'' mysys/test_file.c: New BitKeeper file ``mysys/test_file.c'' mysys/test_file.h: New BitKeeper file ``mysys/test_file.h'' mysys/test_pagecache_consist.c: New BitKeeper file ``mysys/test_pagecache_consist.c'' mysys/test_pagecache_single.c: New BitKeeper file ``mysys/test_pagecache_single.c'' --- include/pagecache.h | 226 ++ mysys/Makefile.am | 58 +- mysys/mf_pagecache.c | 3920 ++++++++++++++++++++++++++++++++ mysys/test_file.c | 70 + mysys/test_file.h | 14 + mysys/test_pagecache_consist.c | 447 ++++ mysys/test_pagecache_single.c | 589 +++++ 7 files changed, 5323 insertions(+), 1 deletion(-) create mode 100644 include/pagecache.h create mode 100755 mysys/mf_pagecache.c create mode 100644 mysys/test_file.c create mode 100644 mysys/test_file.h create mode 100755 mysys/test_pagecache_consist.c create mode 100644 mysys/test_pagecache_single.c diff --git a/include/pagecache.h b/include/pagecache.h new file mode 100644 index 00000000000..018e695fa27 --- /dev/null +++ b/include/pagecache.h @@ -0,0 +1,226 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Page cache variable structures */ + +#ifndef _pagecache_h +#define _pagecache_h +C_MODE_START + +/* Type of the page */ +enum pagecache_page_type +{ +#ifndef DBUG_OFF + /* used only for control page type chenging during debugging */ + PAGECACHE_EMPTY_PAGE, +#endif + /* the page does not contain LSN */ + PAGECACHE_PLAIN_PAGE, + /* the page contain LSN (maria tablespace page) */ + PAGECACHE_LSN_PAGE +}; + +/* + This enum describe lock status changing. every typr of page cache will + interpret WRITE/READ lock as it need. +*/ +enum pagecache_page_lock +{ + PAGECACHE_LOCK_LEFT_UNLOCKED, /* free -> free */ + PAGECACHE_LOCK_LEFT_READLOCKED, /* read -> read */ + PAGECACHE_LOCK_LEFT_WRITELOCKED, /* write -> write */ + PAGECACHE_LOCK_READ, /* free -> read */ + PAGECACHE_LOCK_WRITE, /* free -> write */ + PAGECACHE_LOCK_READ_UNLOCK, /* read -> free */ + PAGECACHE_LOCK_WRITE_UNLOCK, /* write -> free */ + PAGECACHE_LOCK_WRITE_TO_READ /* write -> read */ +}; +/* + This enum describe pin status changing +*/ +enum pagecache_page_pin +{ + PAGECACHE_PIN_LEFT_PINNED, /* pinned -> pinned */ + PAGECACHE_PIN_LEFT_UNPINNED, /* unpinned -> unpinned */ + PAGECACHE_PIN, /* unpinned -> pinned */ + PAGECACHE_UNPIN /* pinned -> unpinned */ +}; +/* How to write the page */ +enum pagecache_write_mode +{ + /* do not write immediately, i.e. it will be dirty page */ + PAGECACHE_WRITE_DELAY, + /* write page to the file and put it to the cache */ + PAGECACHE_WRITE_NOW, + /* page already is in the file. (key cache insert analogue) */ + PAGECACHE_WRITE_DONE +}; + +typedef void *PAGECACHE_PAGE_LINK; + +/* TODO: move to loghandler emulator */ +typedef void LOG_HANDLER; +typedef void *LSN; + +/* file descriptor for Maria */ +typedef struct st_pagecache_file +{ + int file; /* it is for debugging purposes then it will be uint32 file_no */ +} PAGECACHE_FILE; + +/* page number for maria */ +typedef uint32 maria_page_no_t; + +/* declare structures that is used by st_pagecache */ + +struct st_pagecache_block_link; +typedef struct st_pagecache_block_link PAGECACHE_BLOCK_LINK; +struct st_pagecache_page; +typedef struct st_pagecache_page PAGECACHE_PAGE; +struct st_pagecache_hash_link; +typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK; + +/* info about requests in a waiting queue */ +typedef struct st_pagecache_wqueue +{ + struct st_my_thread_var *last_thread; /* circular list of waiting threads */ +} PAGECACHE_WQUEUE; + +#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */ + +/* + The page cache structure + It also contains read-only statistics parameters. +*/ + +typedef struct st_pagecache +{ + my_bool inited; + my_bool resize_in_flush; /* true during flush of resize operation */ + my_bool can_be_used; /* usage of cache for read/write is allowed */ + uint shift; /* block size = 2 ^ shift */ + my_size_t mem_size; /* specified size of the cache memory */ + uint32 block_size; /* size of the page buffer of a cache block */ + ulong min_warm_blocks; /* min number of warm blocks; */ + ulong age_threshold; /* age threshold for hot blocks */ + ulonglong time; /* total number of block link operations */ + uint hash_entries; /* max number of entries in the hash table */ + int hash_links; /* max number of hash links */ + int hash_links_used; /* number of hash links currently used */ + int disk_blocks; /* max number of blocks in the cache */ + ulong blocks_used; /* maximum number of concurrently used blocks */ + ulong blocks_unused; /* number of currently unused blocks */ + ulong blocks_changed; /* number of currently dirty blocks */ + ulong warm_blocks; /* number of blocks in warm sub-chain */ + ulong cnt_for_resize_op; /* counter to block resize operation */ + long blocks_available; /* number of blocks available in the LRU chain */ + PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */ + PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */ + PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */ + PAGECACHE_BLOCK_LINK *free_block_list;/* list of free blocks */ + PAGECACHE_BLOCK_LINK *block_root;/* memory for block links */ + byte HUGE_PTR *block_mem; /* memory for block buffers */ + PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */ + PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */ + pthread_mutex_t cache_lock; /* to lock access to the cache structure */ + PAGECACHE_WQUEUE resize_queue; /* threads waiting during resize operation */ + PAGECACHE_WQUEUE waiting_for_hash_link;/* waiting for a free hash link */ + PAGECACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */ + /* hash for dirty file bl.*/ + PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; + /* hash for other file bl.*/ + PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; + + LOG_HANDLER *loghandler; /* loghandler structure */ + + /* + The following variables are and variables used to hold parameters for + initializing the key cache. + */ + + ulonglong param_buff_size; /* size the memory allocated for the cache */ + ulong param_block_size; /* size of the blocks in the key cache */ + ulong param_division_limit; /* min. percentage of warm blocks */ + ulong param_age_threshold; /* determines when hot block is downgraded */ + + /* Statistics variables. These are reset in reset_key_cache_counters(). */ + ulong global_blocks_changed; /* number of currently dirty blocks */ + ulonglong global_cache_w_requests;/* number of write requests (write hits) */ + ulonglong global_cache_write; /* number of writes from cache to files */ + ulonglong global_cache_r_requests;/* number of read requests (read hits) */ + ulonglong global_cache_read; /* number of reads from files to cache */ + + int blocks; /* max number of blocks in the cache */ + my_bool in_init; /* Set to 1 in MySQL during init/resize */ +} PAGECACHE; + +extern int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size, + LOG_HANDLER *loghandler); +extern int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold); +extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold); +extern byte *pagecache_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link); +extern my_bool pagecache_write(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link); +void pagecache_unlock_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page); +void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page); +void pagecache_unpin_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno); +void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link); +extern int flush_pagecache_blocks(PAGECACHE *keycache, + PAGECACHE_FILE *file, + enum flush_type type); +my_bool pagecache_delete_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush); +extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); + +C_MODE_END +#endif /* _keycache_h */ diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 1241e8cdded..485887a085e 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -54,7 +54,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_gethostbyname.c rijndael.c my_aes.c sha1.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ - my_windac.c my_access.c base64.c + my_windac.c my_access.c base64.c mf_pagecache.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ @@ -129,5 +129,61 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c +test_mf_pagecache.o: mf_pagecache.c ../include/pagecache.h $(LIBRARIES) + $(CP) $(srcdir)/mf_pagecache.c test_mf_pagecache.c + $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_mf_pagecache.c + +test_file.o: test_file.c test_file.h + $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_file.c + +test_pagecache_single1k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) + +test_pagecache_single8k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=8192 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) + +test_pagecache_single64k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) + +test_pagecache_single: test_pagecache_single1k$(EXEEXT) test_pagecache_single8k$(EXEEXT) test_pagecache_single64k$(EXEEXT) + ./test_pagecache_single64k$(EXEEXT) + ./test_pagecache_single8k$(EXEEXT) + ./test_pagecache_single1k$(EXEEXT) + +test_pagecache_consist1k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist1kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist1kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist1kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist: test_pagecache_consist1k$(EXEEXT) test_pagecache_consist64k$(EXEEXT) test_pagecache_consist1kHC$(EXEEXT) test_pagecache_consist64kHC$(EXEEXT) test_pagecache_consist1kRD$(EXEEXT) test_pagecache_consist64kRD$(EXEEXT) test_pagecache_consist1kWR$(EXEEXT) test_pagecache_consist64kWR$(EXEEXT) + ./test_pagecache_consist1k$(EXEEXT) + ./test_pagecache_consist64k$(EXEEXT) + ./test_pagecache_consist1kHC$(EXEEXT) + ./test_pagecache_consist64kHC$(EXEEXT) + ./test_pagecache_consist1kRD$(EXEEXT) + ./test_pagecache_consist64kRD$(EXEEXT) + ./test_pagecache_consist1kWR$(EXEEXT) + ./test_pagecache_consist64kWR$(EXEEXT) + + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c new file mode 100755 index 00000000000..4693995f922 --- /dev/null +++ b/mysys/mf_pagecache.c @@ -0,0 +1,3920 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + These functions handle page cacheing for Maria tables. + + One cache can handle many files. + It must contain buffers of the same blocksize. + init_pagecache() should be used to init cache handler. + + The free list (free_block_list) is a stack like structure. + When a block is freed by free_block(), it is pushed onto the stack. + When a new block is required it is first tried to pop one from the stack. + If the stack is empty, it is tried to get a never-used block from the pool. + If this is empty too, then a block is taken from the LRU ring, flushing it + to disk, if neccessary. This is handled in find_key_block(). + With the new free list, the blocks can have three temperatures: + hot, warm and cold (which is free). This is remembered in the block header + by the enum BLOCK_TEMPERATURE temperature variable. Remembering the + temperature is neccessary to correctly count the number of warm blocks, + which is required to decide when blocks are allowed to become hot. Whenever + a block is inserted to another (sub-)chain, we take the old and new + temperature into account to decide if we got one more or less warm block. + blocks_unused is the sum of never used blocks in the pool and of currently + free blocks. blocks_used is the number of blocks fetched from the pool and + as such gives the maximum number of in-use blocks at any time. +*/ + +#include "mysys_priv.h" +#include +#include "my_static.h" +#include +#include +#include + +/* + Some compilation flags have been added specifically for this module + to control the following: + - not to let a thread to yield the control when reading directly + from page cache, which might improve performance in many cases; + to enable this add: + #define SERIALIZED_READ_FROM_CACHE + - to set an upper bound for number of threads simultaneously + using the page cache; this setting helps to determine an optimal + size for hash table and improve performance when the number of + blocks in the page cache much less than the number of threads + accessing it; + to set this number equal to add + #define MAX_THREADS + - to substitute calls of pthread_cond_wait for calls of + pthread_cond_timedwait (wait with timeout set up); + this setting should be used only when you want to trap a deadlock + situation, which theoretically should not happen; + to set timeout equal to seconds add + #define PAGECACHE_TIMEOUT + - to enable the module traps and to send debug information from + page cache module to a special debug log add: + #define PAGECACHE_DEBUG + the name of this debug log file can be set through: + #define PAGECACHE_DEBUG_LOG + if the name is not defined, it's set by default; + if the PAGECACHE_DEBUG flag is not set up and we are in a debug + mode, i.e. when ! defined(DBUG_OFF), the debug information from the + module is sent to the regular debug log. + + Example of the settings: + #define SERIALIZED_READ_FROM_CACHE + #define MAX_THREADS 100 + #define PAGECACHE_TIMEOUT 1 + #define PAGECACHE_DEBUG + #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" +*/ + +#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" + +/* + In key cache we have external raw locking here we use + SERIALIZED_READ_FROM_CACHE to avoid problem of reading + not consistent data from te page +*/ +#define SERIALIZED_READ_FROM_CACHE yes + +#define BLOCK_INFO(B) DBUG_PRINT("info", \ + ("block 0x%lx, file %lu, page %lu, s %0x", \ + (ulong)(B), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->file.file : \ + 0), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->pageno : \ + 0), \ + (B)->status)) + +/* TODO: put it to my_static.c */ +my_bool my_disable_flush_pagecache_blocks= 0; + +#if defined(MSDOS) && !defined(M_IC80386) +/* we nead much memory */ +#undef my_malloc_lock +#undef my_free_lock +#define my_malloc_lock(A,B) halloc((long) (A/IO_SIZE),IO_SIZE) +#define my_free_lock(A,B) hfree(A) +#endif /* defined(MSDOS) && !defined(M_IC80386) */ + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) + +/* types of condition variables */ +#define COND_FOR_REQUESTED 0 +#define COND_FOR_SAVED 1 +#define COND_FOR_WRLOCK 2 +#define COND_FOR_COPY 3 +#define COND_SIZE 4 + +typedef pthread_cond_t KEYCACHE_CONDVAR; + +/* descriptor of the page in the page cache block buffer */ +struct st_pagecache_page +{ + PAGECACHE_FILE file; /* file to which the page belongs to */ + maria_page_no_t pageno; /* number of the page in the file */ +}; + +/* element in the chain of a hash table bucket */ +struct st_pagecache_hash_link +{ + struct st_pagecache_hash_link + *next, **prev; /* to connect links in the same bucket */ + struct st_pagecache_block_link + *block; /* reference to the block for the page: */ + PAGECACHE_FILE file; /* from such a file */ + maria_page_no_t pageno; /* this page */ + uint requests; /* number of requests for the page */ +}; + +/* simple states of a block */ +#define BLOCK_ERROR 1 /* an error occured when performing disk i/o */ +#define BLOCK_READ 2 /* the is page in the block buffer */ +#define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ +#define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */ +#define BLOCK_IN_FLUSH 16 /* block is in flush operation */ +#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ +#define BLOCK_WRLOCK 64 /* write locked block */ +#define BLOCK_CPYWRT 128 /* block buffer is in copy&write (see also cpyrd) */ + +/* page status, returned by find_key_block */ +#define PAGE_READ 0 +#define PAGE_TO_BE_READ 1 +#define PAGE_WAIT_TO_BE_READ 2 + +/* block temperature determines in which (sub-)chain the block currently is */ +enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; + +/* debug info */ +#ifndef DBUG_OFF +static char *page_cache_page_type_str[]= +{ + (char*)"PLAIN", + (char*)"LSN" +}; +static char *page_cache_page_write_mode_str[]= +{ + (char*)"DELAY", + (char*)"NOW", + (char*)"DONE" +}; +static char *page_cache_page_lock_str[]= +{ + (char*)"free -> free ", + (char*)"read -> read ", + (char*)"write -> write", + (char*)"free -> read ", + (char*)"free -> write", + (char*)"read -> free ", + (char*)"write -> free ", + (char*)"write -> read " +}; +static char *page_cache_page_pin_str[]= +{ + (char*)"pinned -> pinned ", + (char*)"unpinned -> unpinned", + (char*)"unpinned -> pinned ", + (char*)"pinned -> unpinned" +}; +#endif +#ifdef PAGECACHE_DEBUG +typedef struct st_pagecache_pin_info +{ + struct st_pagecache_pin_info *next, **prev; + struct st_my_thread_var *thread; +} PAGECACHE_PIN_INFO; +/* + st_pagecache_lock_info structure should be kept in next, prev, thread part + compatible with st_pagecache_pin_info to be compatible in functions. +*/ +typedef struct st_pagecache_lock_info +{ + struct st_pagecache_lock_info *next, **prev; + struct st_my_thread_var *thread; + my_bool write_lock; +} PAGECACHE_LOCK_INFO; +/* service functions */ +void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) +{ + if ((node->next= *list)) + node->next->prev= &(node->next); + *list= node; + node->prev= list; +} +void info_unlink(PAGECACHE_PIN_INFO *node) +{ + if ((*node->prev= node->next)) + node->next->prev= node->prev; +} +PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, + struct st_my_thread_var *thread) +{ + register PAGECACHE_PIN_INFO *i= list; + for(; i != 0; i= i->next) + if (i->thread == thread) + return i; + return 0; +} +#endif + +/* page cache block */ +struct st_pagecache_block_link +{ + struct st_pagecache_block_link + *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ + struct st_pagecache_block_link + *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ + struct st_pagecache_hash_link + *hash_link; /* backward ptr to referring hash_link */ + PAGECACHE_WQUEUE + wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ + uint requests; /* number of requests for the block */ + byte *buffer; /* buffer for the block page */ + volatile uint status; /* state of the block */ + volatile uint pins; /* pin counter */ +#ifdef PAGECACHE_DEBUG + PAGECACHE_PIN_INFO *pin_list; + PAGECACHE_LOCK_INFO *lock_list; +#endif + enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ + enum pagecache_page_type type; /* type of the block */ + uint hits_left; /* number of hits left until promotion */ + ulonglong last_hit_time; /* timestamp of the last hit */ + KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ +}; + +#ifdef PAGECACHE_DEBUG +/* debug checks */ +bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("info_check_pin"); + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); + if (info) + { + if (mode == PAGECACHE_PIN_LEFT_UNPINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_UNPINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_PIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: PIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + else + { + if (mode == PAGECACHE_PIN_LEFT_PINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_PINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_UNPIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: UNPIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} +bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("info_check_lock"); + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, + thread); + switch(lock) + { + case PAGECACHE_LOCK_LEFT_UNLOCKED: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED); + if (info) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->U", + (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_LEFT_READLOCKED: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || + pin == PAGECACHE_PIN_LEFT_UNPINNED); + if (info == 0 || info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->R", + (ulong)thread, (ulong)block, (info?'W':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_LEFT_WRITELOCKED: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED); + if (info == 0 || !info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->W", + (ulong)thread, (ulong)block, (info?'R':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_READ: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || + pin == PAGECACHE_PIN); + if (info != 0) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->R", + (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_WRITE: + DBUG_ASSERT(pin == PAGECACHE_PIN); + if (info != 0) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->W", + (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); + DBUG_RETURN(1); + } + break; + + case PAGECACHE_LOCK_READ_UNLOCK: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || + pin == PAGECACHE_UNPIN); + if (info == 0 || info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->U", + (ulong)thread, (ulong)block, (info?'W':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_WRITE_UNLOCK: + DBUG_ASSERT(pin == PAGECACHE_UNPIN); + if (info == 0 || !info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", + (ulong)thread, (ulong)block, (info?'R':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_WRITE_TO_READ: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED || + pin == PAGECACHE_UNPIN); + if (info == 0 || !info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", + (ulong)thread, (ulong)block, (info?'R':'U'))); + DBUG_RETURN(1); + } + break; + } + DBUG_RETURN(0); +} +#endif + +#define FLUSH_CACHE 2000 /* sort this many blocks at once */ + +static int flush_all_key_blocks(PAGECACHE *pagecache); +#ifdef THREAD +static void link_into_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread); +static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread); +#endif +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); +static void test_key_cache(PAGECACHE *pagecache, + const char *where, my_bool lock); + +#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \ + (ulong) (f).file) & (p->hash_entries-1)) +#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1)) + +#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log" + +#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG) +#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG +#endif + +#if defined(PAGECACHE_DEBUG_LOG) +static FILE *pagecache_debug_log= NULL; +static void pagecache_debug_print _VARARGS((const char *fmt, ...)); +#define PAGECACHE_DEBUG_OPEN \ + if (!pagecache_debug_log) \ + { \ + pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \ + (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ + } + +#define PAGECACHE_DEBUG_CLOSE \ + if (pagecache_debug_log) \ + { \ + fclose(pagecache_debug_log); \ + pagecache_debug_log= 0; \ + } +#else +#define PAGECACHE_DEBUG_OPEN +#define PAGECACHE_DEBUG_CLOSE +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) +#define KEYCACHE_DBUG_PRINT(l, m) \ + { if (pagecache_debug_log) \ + fprintf(pagecache_debug_log, "%s: ", l); \ + pagecache_debug_print m; } + +#define KEYCACHE_DBUG_ASSERT(a) \ + { if (! (a) && pagecache_debug_log) \ + fclose(pagecache_debug_log); \ + assert(a); } +#else +#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) +#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) +#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ + +#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) +#ifdef THREAD +static long pagecache_thread_id; +#define KEYCACHE_THREAD_TRACE(l) \ + KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id)) + +#define KEYCACHE_THREAD_TRACE_BEGIN(l) \ + { struct st_my_thread_var *thread_var= my_thread_var; \ + pagecache_thread_id= thread_var->id; \ + KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) } + +#define KEYCACHE_THREAD_TRACE_END(l) \ + KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) +#else /* THREAD */ +#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,("")) +#endif /* THREAD */ +#else +#define KEYCACHE_THREAD_TRACE_BEGIN(l) +#define KEYCACHE_THREAD_TRACE_END(l) +#define KEYCACHE_THREAD_TRACE(l) +#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */ + +#define BLOCK_NUMBER(p, b) \ + ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK))) +#define PAGECACHE_HASH_LINK_NUMBER(p, h) \ + ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \ + sizeof(PAGECACHE_HASH_LINK))) + +#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex); +#else +#define pagecache_pthread_cond_wait pthread_cond_wait +#endif + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex); +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex); +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); +#define pagecache_pthread_mutex_lock(M) \ +{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_lock(M);} +#define pagecache_pthread_mutex_unlock(M) \ +{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_unlock(M);} +#define pagecache_pthread_cond_signal(M) \ +{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_cond_signal(M);} +#else +#define pagecache_pthread_mutex_lock pthread_mutex_lock +#define pagecache_pthread_mutex_unlock pthread_mutex_unlock +#define pagecache_pthread_cond_signal pthread_cond_signal +#endif /* defined(PAGECACHE_DEBUG) */ + + +/* + Read page from the disk + + SYNOPSIS + pagecache_fwrite() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer in which we will read + type - page type (plain or with LSN) + flags - MYF() flags + + RETURN + 0 - OK + !=0 - Error +*/ +uint pagecache_fwrite(PAGECACHE *pagecache, + PAGECACHE_FILE *filedesc, + byte *buffer, + maria_page_no_t pageno, + enum pagecache_page_type type, + myf flags) +{ + DBUG_ENTER("pagecache_fwrite"); + if (type == PAGECACHE_LSN_PAGE) + { + DBUG_PRINT("info", ("Log handler call")); + /* TODO: put here loghandler call */ + } + DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, + (pageno)<<(pagecache->shift), flags)); +} + + +/* + Read page from the disk + + SYNOPSIS + pagecache_fread() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer in which we will read + pageno - page number + flags - MYF() flags +*/ +#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \ + my_pread((filedesc)->file, buffer, pagecache->block_size, \ + (pageno)<<(pagecache->shift), flags) + + +static uint next_power(uint value) +{ + uint old_value= 1; + while (value) + { + old_value= value; + value&= value-1; + } + return (old_value << 1); +} + + +/* + Initialize a page cache + + SYNOPSIS + init_pagecache() + pagecache pointer to a page cache data structure + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the key cache + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + block_size size of block (should be power of 2) + loghandler logfandler pointer to call it in case of + pages with LSN + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + if pagecache->inited != 0 we assume that the key cache + is already initialized. This is for now used by myisamchk, but shouldn't + be something that a program should rely on! + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. + +*/ + +int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size, + LOG_HANDLER *loghandler) +{ + int blocks, hash_links, length; + int error; + DBUG_ENTER("init_key_cache"); + DBUG_ASSERT(block_size >= 512); + + PAGECACHE_DEBUG_OPEN; + if (pagecache->inited && pagecache->disk_blocks > 0) + { + DBUG_PRINT("warning",("key cache already in use")); + DBUG_RETURN(0); + } + + pagecache->loghandler= loghandler; + + pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; + pagecache->global_cache_read= pagecache->global_cache_write= 0; + pagecache->disk_blocks= -1; + if (! pagecache->inited) + { + pagecache->inited= 1; + pagecache->in_init= 0; + pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST); + pagecache->resize_queue.last_thread= NULL; + } + + pagecache->mem_size= use_mem; + pagecache->block_size= block_size; + pagecache->shift= my_bit_log2(block_size); + DBUG_PRINT("info", ("block_size: %u", + block_size)); + + blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + + 2 * sizeof(PAGECACHE_HASH_LINK) + + sizeof(PAGECACHE_HASH_LINK*) * + 5/4 + block_size)); + /* It doesn't make sense to have too few blocks (less than 8) */ + if (blocks >= 8 && pagecache->disk_blocks < 0) + { + for ( ; ; ) + { + /* Set my_hash_entries to the next bigger 2 power */ + if ((pagecache->hash_entries= next_power((uint)blocks)) < + ((uint)blocks) * 5/4) + pagecache->hash_entries<<= 1; + hash_links= 2 * blocks; +#if defined(MAX_THREADS) + if (hash_links < MAX_THREADS + blocks - 1) + hash_links= MAX_THREADS + blocks - 1; +#endif + while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) + + ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))) + + ((ulong) blocks << pagecache->shift) > use_mem) + blocks--; + /* Allocate memory for cache page buffers */ + if ((pagecache->block_mem= + my_large_malloc((ulong) blocks * pagecache->block_size, + MYF(MY_WME)))) + { + /* + Allocate memory for blocks, hash_links and hash entries; + For each block 2 hash links are allocated + */ + if ((pagecache->block_root= + (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length, + MYF(0)))) + break; + my_large_free(pagecache->block_mem, MYF(0)); + pagecache->block_mem= 0; + } + if (blocks < 8) + { + my_errno= ENOMEM; + goto err; + } + blocks= blocks / 4*3; + } + pagecache->blocks_unused= (ulong) blocks; + pagecache->disk_blocks= (int) blocks; + pagecache->hash_links= hash_links; + pagecache->hash_root= + (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root + + ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK))); + pagecache->hash_link_root= + (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root + + ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))); + bzero((byte*) pagecache->block_root, + pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK)); + bzero((byte*) pagecache->hash_root, + pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*)); + bzero((byte*) pagecache->hash_link_root, + pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK)); + pagecache->hash_links_used= 0; + pagecache->free_hash_list= NULL; + pagecache->blocks_used= pagecache->blocks_changed= 0; + + pagecache->global_blocks_changed= 0; + pagecache->blocks_available=0; /* For debugging */ + + /* The LRU chain is empty after initialization */ + pagecache->used_last= NULL; + pagecache->used_ins= NULL; + pagecache->free_block_list= NULL; + pagecache->time= 0; + pagecache->warm_blocks= 0; + pagecache->min_warm_blocks= (division_limit ? + blocks * division_limit / 100 + 1 : + (ulong)blocks); + pagecache->age_threshold= (age_threshold ? + blocks * age_threshold / 100 : + (ulong)blocks); + + pagecache->cnt_for_resize_op= 0; + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 1; + + pagecache->waiting_for_hash_link.last_thread= NULL; + pagecache->waiting_for_block.last_thread= NULL; + DBUG_PRINT("exit", + ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ + hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", + pagecache->disk_blocks, pagecache->block_root, + pagecache->hash_entries, pagecache->hash_root, + pagecache->hash_links, pagecache->hash_link_root)); + bzero((gptr) pagecache->changed_blocks, + sizeof(pagecache->changed_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + bzero((gptr) pagecache->file_blocks, + sizeof(pagecache->file_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + } + + pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; + DBUG_RETURN((uint) pagecache->disk_blocks); + +err: + error= my_errno; + pagecache->disk_blocks= 0; + pagecache->blocks= 0; + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + } + if (pagecache->block_root) + { + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + my_errno= error; + pagecache->can_be_used= 0; + DBUG_RETURN(0); +} + + +/* + Resize a key cache + + SYNOPSIS + resize_pagecache() + pagecache pointer to a page cache data structure + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first compares the memory size parameter + with the key cache value. + + If they differ the function free the the memory allocated for the + old key cache blocks by calling the end_pagecache function and + then rebuilds the key cache with new blocks by calling + init_key_cache. + + The function starts the operation only when all other threads + performing operations with the key cache let her to proceed + (when cnt_for_resize=0). +*/ + +int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold) +{ + int blocks; + struct st_my_thread_var *thread; + PAGECACHE_WQUEUE *wqueue; + DBUG_ENTER("resize_pagecache"); + + if (!pagecache->inited) + DBUG_RETURN(pagecache->disk_blocks); + + if(use_mem == pagecache->mem_size) + { + change_pagecache_param(pagecache, division_limit, age_threshold); + DBUG_RETURN(pagecache->disk_blocks); + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + +#ifdef THREAD + wqueue= &pagecache->resize_queue; + thread= my_thread_var; + link_into_queue(wqueue, thread); + + while (wqueue->last_thread->next != thread) + { + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#endif + + pagecache->resize_in_flush= 1; + if (flush_all_key_blocks(pagecache)) + { + /* TODO: if this happens, we should write a warning in the log file ! */ + pagecache->resize_in_flush= 0; + blocks= 0; + pagecache->can_be_used= 0; + goto finish; + } + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 0; +#ifdef THREAD + while (pagecache->cnt_for_resize_op) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0); +#endif + + end_pagecache(pagecache, 0); /* Don't free mutex */ + /* The following will work even if use_mem is 0 */ + blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, + division_limit, age_threshold, + pagecache->loghandler); + +finish: +#ifdef THREAD + unlink_from_queue(wqueue, thread); + /* Signal for the next resize request to proceeed if any */ + if (wqueue->last_thread) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: signal", + ("thread %ld", wqueue->last_thread->next->id)); + pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend); + } +#endif + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(blocks); +} + + +/* + Increment counter blocking resize key cache operation +*/ +static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) +{ + pagecache->cnt_for_resize_op++; +} + + +/* + Decrement counter blocking resize key cache operation; + Signal the operation to proceed when counter becomes equal zero +*/ +static inline void dec_counter_for_resize_op(PAGECACHE *pagecache) +{ +#ifdef THREAD + struct st_my_thread_var *last_thread; + if (!--pagecache->cnt_for_resize_op && + (last_thread= pagecache->resize_queue.last_thread)) + { + KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal", + ("thread %ld", last_thread->next->id)); + pagecache_pthread_cond_signal(&last_thread->next->suspend); + } +#else + pagecache->cnt_for_resize_op--; +#endif +} + +/* + Change the page cache parameters + + SYNOPSIS + change_pagecache_param() + pagecache pointer to a page cache data structure + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + none + + NOTES. + Presently the function resets the key cache parameters + concerning midpoint insertion strategy - division_limit and + age_threshold. +*/ + +void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold) +{ + DBUG_ENTER("change_pagecache_param"); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (division_limit) + pagecache->min_warm_blocks= (pagecache->disk_blocks * + division_limit / 100 + 1); + if (age_threshold) + pagecache->age_threshold= (pagecache->disk_blocks * + age_threshold / 100); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_VOID_RETURN; +} + + +/* + Remove page cache from memory + + SYNOPSIS + end_pagecache() + pagecache page cache handle + cleanup Complete free (Free also mutex for key cache) + + RETURN VALUE + none +*/ + +void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) +{ + DBUG_ENTER("end_pagecache"); + DBUG_PRINT("enter", ("key_cache: 0x%lx", pagecache)); + + if (!pagecache->inited) + DBUG_VOID_RETURN; + + if (pagecache->disk_blocks > 0) + { + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + pagecache->disk_blocks= -1; + /* Reset blocks_changed to be safe if flush_all_key_blocks is called */ + pagecache->blocks_changed= 0; + } + + DBUG_PRINT("status", ("used: %d changed: %d w_requests: %lu " + "writes: %lu r_requests: %lu reads: %lu", + pagecache->blocks_used, pagecache->global_blocks_changed, + (ulong) pagecache->global_cache_w_requests, + (ulong) pagecache->global_cache_write, + (ulong) pagecache->global_cache_r_requests, + (ulong) pagecache->global_cache_read)); + + if (cleanup) + { + pthread_mutex_destroy(&pagecache->cache_lock); + pagecache->inited= pagecache->can_be_used= 0; + PAGECACHE_DEBUG_CLOSE; + } + DBUG_VOID_RETURN; +} /* end_pagecache */ + + +#ifdef THREAD +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +static void link_into_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (! (last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id)); + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev=thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +static inline void add_to_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (! (last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +static void release_queue(PAGECACHE_WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread=next; + KEYCACHE_DBUG_PRINT("release_queue: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + next=thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} +#endif + + +/* + Unlink a block from the chain of dirty/clean blocks +*/ + +static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block) +{ + if (block->next_changed) + block->next_changed->prev_changed= block->prev_changed; + *block->prev_changed= block->next_changed; +} + + +/* + Link a block into the chain of dirty/clean blocks +*/ + +static inline void link_changed(PAGECACHE_BLOCK_LINK *block, + PAGECACHE_BLOCK_LINK **phead) +{ + block->prev_changed= phead; + if ((block->next_changed= *phead)) + (*phead)->prev_changed= &block->next_changed; + *phead= block; +} + + +/* + Unlink a block from the chain of dirty/clean blocks, if it's asked for, + and link it to the chain of clean blocks for the specified file +*/ + +static void link_to_file_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + PAGECACHE_FILE *file, my_bool unlink) +{ + if (unlink) + unlink_changed(block); + link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]); + if (block->status & BLOCK_CHANGED) + { + block->status&= ~BLOCK_CHANGED; + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + } +} + + +/* + Unlink a block from the chain of clean blocks for the specified + file and link it to the chain of dirty blocks for this file +*/ + +static inline void link_to_changed_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + unlink_changed(block); + link_changed(block, + &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]); + block->status|=BLOCK_CHANGED; + pagecache->blocks_changed++; + pagecache->global_blocks_changed++; +} + + +/* + Link a block to the LRU chain at the beginning or at the end of + one of two parts. + + SYNOPSIS + link_block() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + hot <-> to link the block into the hot subchain + at_end <-> to link the block at the end of the subchain + + RETURN VALUE + none + + NOTES. + The LRU chain is represented by a curcular list of block structures. + The list is double-linked of the type (**prev,*next) type. + The LRU chain is divided into two parts - hot and warm. + There are two pointers to access the last blocks of these two + parts. The beginning of the warm part follows right after the + end of the hot part. + Only blocks of the warm part can be used for replacement. + The first block from the beginning of this subchain is always + taken for eviction (pagecache->last_used->next) + + LRU chain: +------+ H O T +------+ + +----| end |----...<----| beg |----+ + | +------+last +------+ | + v<-link in latest hot (new end) | + | link in latest warm (new end)->^ + | +------+ W A R M +------+ | + +----| beg |---->...----| end |----+ + +------+ +------+ins + first for eviction +*/ + +static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + my_bool hot, my_bool at_end) +{ + PAGECACHE_BLOCK_LINK *ins; + PAGECACHE_BLOCK_LINK **pins; + + KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); +#ifdef THREAD + if (!hot && pagecache->waiting_for_block.last_thread) + { + /* Signal that in the LRU warm sub-chain an available block has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_block.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_HASH_LINK *hash_link= + (PAGECACHE_HASH_LINK *) first_thread->opt_info; + struct st_my_thread_var *thread; + do + { + thread= next_thread; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link) + { + KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + unlink_from_queue(&pagecache->waiting_for_block, thread); + block->requests++; + } + } + while (thread != last_thread); + hash_link->block= block; + KEYCACHE_THREAD_TRACE("link_block: after signaling"); +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_PRINT("link_block", + ("linked,unlinked block %u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); +#endif + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); + /* Condition not transformed using DeMorgan, to keep the text identical */ +#endif /* THREAD */ + pins= hot ? &pagecache->used_ins : &pagecache->used_last; + ins= *pins; + if (ins) + { + ins->next_used->prev_used= &block->next_used; + block->next_used= ins->next_used; + block->prev_used= &ins->next_used; + ins->next_used= block; + if (at_end) + *pins= block; + } + else + { + /* The LRU chain is empty */ + pagecache->used_last= pagecache->used_ins= block->next_used= block; + block->prev_used= &block->next_used; + } + KEYCACHE_THREAD_TRACE("link_block"); +#if defined(PAGECACHE_DEBUG) + pagecache->blocks_available++; + KEYCACHE_DBUG_PRINT("link_block", + ("linked block %u:%1u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(pagecache, block), at_end, block->status, + block->requests, pagecache->blocks_available)); + KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <= + pagecache->blocks_used); +#endif +} + + +/* + Unlink a block from the LRU chain + + SYNOPSIS + unlink_block() + pagecache pointer to a page cache data structure + block pointer to the block to unlink from the LRU chain + + RETURN VALUE + none + + NOTES. + See NOTES for link_block +*/ + +static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + if (block->next_used == block) + /* The list contains only one member */ + pagecache->used_last= pagecache->used_ins= NULL; + else + { + block->next_used->prev_used= block->prev_used; + *block->prev_used= block->next_used; + if (pagecache->used_last == block) + pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + if (pagecache->used_ins == block) + pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + } + block->next_used= NULL; + + KEYCACHE_THREAD_TRACE("unlink_block"); +#if defined(PAGECACHE_DEBUG) + pagecache->blocks_available--; + KEYCACHE_DBUG_PRINT("unlink_block", + ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u", + (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); + BLOCK_INFO(block); + KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0); +#endif +} + + +/* + Register requests for a block +*/ +static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + int count) +{ + DBUG_ENTER("reg_requests"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", + (ulong)block, BLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + BLOCK_INFO(block); + if (! block->requests) + /* First request for the block unlinks it */ + unlink_block(pagecache, block); + block->requests+=count; + DBUG_VOID_RETURN; +} + + +/* + Unregister request for a block + linking it to the LRU chain if it's the last request + + SYNOPSIS + unreg_request() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + at_end <-> to link the block at the end of the LRU chain + + RETURN VALUE + none + + NOTES. + Every linking to the LRU chain decrements by one a special block + counter (if it's positive). If the at_end parameter is TRUE the block is + added either at the end of warm sub-chain or at the end of hot sub-chain. + It is added to the hot subchain if its counter is zero and number of + blocks in warm sub-chain is not less than some low limit (determined by + the division_limit parameter). Otherwise the block is added to the warm + sub-chain. If the at_end parameter is FALSE the block is always added + at beginning of the warm sub-chain. + Thus a warm block can be promoted to the hot sub-chain when its counter + becomes zero for the first time. + At the same time the block at the very beginning of the hot subchain + might be moved to the beginning of the warm subchain if it stays untouched + for a too long time (this time is determined by parameter age_threshold). +*/ + +static void unreg_request(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, int at_end) +{ + DBUG_ENTER("unreg_request"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", + (ulong)block, BLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + BLOCK_INFO(block); + if (! --block->requests) + { + my_bool hot; + if (block->hits_left) + block->hits_left--; + hot= !block->hits_left && at_end && + pagecache->warm_blocks > pagecache->min_warm_blocks; + if (hot) + { + if (block->temperature == BLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= BLOCK_HOT; + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + pagecache->warm_blocks)); + } + link_block(pagecache, block, hot, (my_bool)at_end); + block->last_hit_time= pagecache->time; + pagecache->time++; + + block= pagecache->used_ins; + /* Check if we should link a hot block to the warm block */ + if (block && pagecache->time - block->last_hit_time > + pagecache->age_threshold) + { + unlink_block(pagecache, block); + link_block(pagecache, block, 0, 0); + if (block->temperature != BLOCK_WARM) + { + pagecache->warm_blocks++; + block->temperature= BLOCK_WARM; + } + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + pagecache->warm_blocks)); + } + } + DBUG_VOID_RETURN; +} + +/* + Remove a reader of the page in block +*/ + +static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) +{ + if (! --block->hash_link->requests && block->condvar) + pagecache_pthread_cond_signal(block->condvar); +} + + +/* + Wait until the last reader of the page in block + signals on its termination +*/ + +static inline void wait_for_readers(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + while (block->hash_link->requests) + { + KEYCACHE_DBUG_PRINT("wait_for_readers: wait", + ("suspend thread %ld block %u", + thread->id, BLOCK_NUMBER(pagecache, block))); + block->condvar= &thread->suspend; + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + block->condvar= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0); +#endif +} + + +/* + Add a hash link to a bucket in the hash_table +*/ + +static inline void link_hash(PAGECACHE_HASH_LINK **start, + PAGECACHE_HASH_LINK *hash_link) +{ + if (*start) + (*start)->prev= &hash_link->next; + hash_link->next= *start; + hash_link->prev= start; + *start= hash_link; +} + + +/* + Remove a hash link from the hash table +*/ + +static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) +{ + KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", + (uint) hash_link->file.file, (ulong) hash_link->pageno, + hash_link->requests)); + KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); + if ((*hash_link->prev= hash_link->next)) + hash_link->next->prev= hash_link->prev; + hash_link->block= NULL; +#ifdef THREAD + if (pagecache->waiting_for_hash_link.last_thread) + { + /* Signal that a free hash link has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_hash_link.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info); + struct st_my_thread_var *thread; + + hash_link->file= first_page->file; + hash_link->pageno= first_page->pageno; + do + { + PAGECACHE_PAGE *page; + thread= next_thread; + page= (PAGECACHE_PAGE *) thread->opt_info; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if (page->file.file == hash_link->file.file && + page->pageno == hash_link->pageno) + { + KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + unlink_from_queue(&pagecache->waiting_for_hash_link, thread); + } + } + while (thread != last_thread); + link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache, + hash_link->file, + hash_link->pageno)], + hash_link); + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread)); +#endif /* THREAD */ + hash_link->next= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link; +} +/* + Get the hash link for the page if it is inthe cache +*/ + +static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + PAGECACHE_HASH_LINK ***start) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; +#if defined(PAGECACHE_DEBUG) + int cnt; +#endif + DBUG_ENTER("get_present_hash_link"); + + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + + /* + Find the bucket in the hash table for the pair (file, pageno); + start contains the head of the bucket list, + hash_link points to the first member of the list + */ + hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache, + *file, pageno)]); +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + /* Look for an element for the pair (file, pageno) in the bucket chain */ + while (hash_link && + (hash_link->pageno != pageno || + hash_link->file.file != file->file)) + { + hash_link= hash_link->next; +#if defined(PAGECACHE_DEBUG) + cnt++; + if (! (cnt <= pagecache->hash_links_used)) + { + int i; + for (i=0, hash_link= **start ; + i < cnt ; i++, hash_link= hash_link->next) + { + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) hash_link->file.file, (ulong) hash_link->pageno)); + } + } + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); +#endif + } + DBUG_RETURN(hash_link); +} + + +/* + Get the hash link for a page +*/ + +static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_HASH_LINK **start; + PAGECACHE_PAGE page; + + KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + +restart: + /* try to find the page in the cache */ + hash_link= get_present_hash_link(pagecache, file, pageno, + &start); + if (! hash_link) + { + /* There is no hash link in the hash table for the pair (file, pageno) */ + if (pagecache->free_hash_list) + { + hash_link= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link->next; + } + else if (pagecache->hash_links_used < pagecache->hash_links) + { + hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++]; + } + else + { +#ifdef THREAD + /* Wait for a free hash link */ + struct st_my_thread_var *thread= my_thread_var; + KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); + page.file= *file; + page.pageno= pageno; + thread->opt_info= (void *) &page; + link_into_queue(&pagecache->waiting_for_hash_link, thread); + KEYCACHE_DBUG_PRINT("get_hash_link: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + thread->opt_info= NULL; +#else + KEYCACHE_DBUG_ASSERT(0); +#endif + goto restart; + } + hash_link->file= *file; + hash_link->pageno= pageno; + link_hash(start, hash_link); + } + /* Register the request for the page */ + hash_link->requests++; + + return hash_link; +} + + +/* + Get a block for the file page requested by a pagecache read/write operation; + If the page is not in the cache return a free block, if there is none + return the lru block after saving its buffer if the page is dirty. + + SYNOPSIS + + find_key_block() + pagecache pointer to a page cache data structure + file handler for the file to read page from + pageno number of the page in the file + init_hits_left how initialize the block counter for the page + wrmode <-> get for writing + reg_req Register request to thye page + page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} + + RETURN VALUE + Pointer to the found block if successful, 0 - otherwise + + NOTES. + For the page from file positioned at pageno the function checks whether + the page is in the key cache specified by the first parameter. + If this is the case it immediately returns the block. + If not, the function first chooses a block for this page. If there is + no not used blocks in the key cache yet, the function takes the block + at the very beginning of the warm sub-chain. It saves the page in that + block if it's dirty before returning the pointer to it. + The function returns in the page_st parameter the following values: + PAGE_READ - if page already in the block, + PAGE_TO_BE_READ - if it is to be read yet by the current thread + WAIT_TO_BE_READ - if it is to be read by another thread + If an error occurs THE BLOCK_ERROR bit is set in the block status. + It might happen that there are no blocks in LRU chain (in warm part) - + all blocks are unlinked for some read/write operations. Then the function + waits until first of this operations links any block back. +*/ + +static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + int init_hits_left, + my_bool wrmode, + my_bool reg_req, + int *page_st) +{ + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_BLOCK_LINK *block; + int error= 0; + int page_status; + + DBUG_ENTER("find_key_block"); + KEYCACHE_THREAD_TRACE("find_key_block:begin"); + DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu", + (uint) file->file, (ulong) pageno, (uint) wrmode)); + KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu", + (uint) file->file, (ulong) pageno, + (uint) wrmode)); +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of find_key_block", 0);); +#endif + +restart: + /* Find the hash link for the requested page (file, pageno) */ + hash_link= get_hash_link(pagecache, file, pageno); + + page_status= -1; + if ((block= hash_link->block) && + block->hash_link == hash_link && (block->status & BLOCK_READ)) + page_status= PAGE_READ; + + if (wrmode && pagecache->resize_in_flush) + { + /* This is a write request during the flush phase of a resize operation */ + + if (page_status != PAGE_READ) + { + /* We don't need the page in the cache: we are going to write on disk */ + hash_link->requests--; + unlink_hash(pagecache, hash_link); + return 0; + } + if (!(block->status & BLOCK_IN_FLUSH)) + { + hash_link->requests--; + /* + Remove block to invalidate the page in the block buffer + as we are going to write directly on disk. + Although we have an exlusive lock for the updated key part + the control can be yieded by the current thread as we might + have unfinished readers of other key parts in the block + buffer. Still we are guaranteed not to have any readers + of the key part we are writing into until the block is + removed from the cache as we set the BLOCK_REASSIGNED + flag (see the code below that handles reading requests). + */ + free_block(pagecache, block); + return 0; + } + /* Wait intil the page is flushed on disk */ + hash_link->requests--; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* + Given the use of "resize_in_flush", it seems impossible + that this whole branch is ever entered in single-threaded case + because "(wrmode && pagecache->resize_in_flush)" cannot be true. + TODO: Check this, and then put the whole branch into the + "#ifdef THREAD" guard. + */ +#endif + } + /* Invalidate page in the block if it has not been done yet */ + if (block->status) + free_block(pagecache, block); + return 0; + } + + if (page_status == PAGE_READ && + (block->status & (BLOCK_IN_SWITCH | BLOCK_REASSIGNED))) + { + /* This is a request for a page to be removed from cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", + ("request for old page in block %u " + "wrmode: %d block->status: %d", + BLOCK_NUMBER(pagecache, block), wrmode, + block->status)); + /* + Only reading requests can proceed until the old dirty page is flushed, + all others are to be suspended, then resubmitted + */ + if (!wrmode && !(block->status & BLOCK_REASSIGNED)) + { + if (reg_req) + reg_requests(pagecache, block, 1); + } + else + { + hash_link->requests--; + KEYCACHE_DBUG_PRINT("find_key_block", + ("request waiting for old page to be saved")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into the queue of those waiting for the old page */ + add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + /* Wait until the request can be resubmitted */ + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + KEYCACHE_DBUG_PRINT("find_key_block", + ("request for old page resubmitted")); + /* Resubmit the request */ + goto restart; + } + } + else + { + /* This is a request for a new page or for a page not to be removed */ + if (! block) + { + /* No block is assigned for the page yet */ + if (pagecache->blocks_unused) + { + if (pagecache->free_block_list) + { + /* There is a block in the free list. */ + block= pagecache->free_block_list; + pagecache->free_block_list= block->next_used; + block->next_used= NULL; + } + else + { + /* There are some never used blocks, take first of them */ + block= &pagecache->block_root[pagecache->blocks_used]; + block->buffer= ADD_TO_PTR(pagecache->block_mem, + ((ulong) pagecache->blocks_used* + pagecache->block_size), + byte*); + pagecache->blocks_used++; + } + pagecache->blocks_unused--; + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->requests= 1; + block->temperature= BLOCK_COLD; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + link_to_file_list(pagecache, block, file, 0); + block->hash_link= hash_link; + hash_link->block= block; + page_status= PAGE_TO_BE_READ; + KEYCACHE_DBUG_PRINT("find_key_block", + ("got free or never used block %u", + BLOCK_NUMBER(pagecache, block))); + } + else + { + /* There are no never used blocks, use a block from the LRU chain */ + + /* + Wait until a new block is added to the LRU chain; + several threads might wait here for the same page, + all of them must get the same block + */ + +#ifdef THREAD + if (! pagecache->used_last) + { + struct st_my_thread_var *thread= my_thread_var; + thread->opt_info= (void *) hash_link; + link_into_queue(&pagecache->waiting_for_block, thread); + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); + thread->opt_info= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->used_last); +#endif + block= hash_link->block; + if (! block) + { + /* + Take the first block from the LRU chain + unlinking it from the chain + */ + block= pagecache->used_last->next_used; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + if (reg_req) + reg_requests(pagecache, block,1); + hash_link->block= block; + } + + if (block->hash_link != hash_link && + ! (block->status & BLOCK_IN_SWITCH) ) + { + /* this is a primary request for a new page */ + block->status|= BLOCK_IN_SWITCH; + + KEYCACHE_DBUG_PRINT("find_key_block", + ("got block %u for new page", + BLOCK_NUMBER(pagecache, block))); + + if (block->status & BLOCK_CHANGED) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 0); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + } + + block->status|= BLOCK_REASSIGNED; + if (block->hash_link) + { + /* + Wait until all pending read requests + for this page are executed + (we could have avoided this waiting, if we had read + a page in the cache in a sweep, without yielding control) + */ + wait_for_readers(pagecache, block); + + /* Remove the hash link for this page from the hash table */ + unlink_hash(pagecache, block->hash_link); + /* All pending requests for this page must be resubmitted */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + release_queue(&block->wqueue[COND_FOR_SAVED]); + } + link_to_file_list(pagecache, block, file, + (my_bool)(block->hash_link ? 1 : 0)); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status= error? BLOCK_ERROR : 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->hash_link= hash_link; + page_status= PAGE_TO_BE_READ; + + KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); + KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); + } + else + { + /* This is for secondary requests for a new page only */ + KEYCACHE_DBUG_PRINT("find_key_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + pagecache->global_cache_read++; + } + else + { + if (reg_req) + reg_requests(pagecache, block, 1); + KEYCACHE_DBUG_PRINT("find_key_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + + KEYCACHE_DBUG_ASSERT(page_status != -1); + *page_st=page_status; + DBUG_PRINT("info", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + (ulong) block, (uint) file->file, + (ulong) pageno, block->status, (uint) page_status)); + KEYCACHE_DBUG_PRINT("find_key_block", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + (ulong) block, + (uint) file->file, (ulong) pageno, block->status, + (uint) page_status)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of find_key_block",0);); +#endif + KEYCACHE_THREAD_TRACE("find_key_block:end"); + DBUG_RETURN(block); +} + + +void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_add_pin"); + DBUG_PRINT("enter", ("block 0x%lx pins: %u", + (ulong) block, + block->pins)); + BLOCK_INFO(block); + block->pins++; +#ifdef PAGECACHE_DEBUG + { + PAGECACHE_PIN_INFO *info= + (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0)); + info->thread= my_thread_var; + info_link(&block->pin_list, info); + } +#endif + DBUG_VOID_RETURN; +} + +void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_remove_pin"); + DBUG_PRINT("enter", ("block 0x%lx pins: %u", + (ulong) block, + block->pins)); + BLOCK_INFO(block); + DBUG_ASSERT(block->pins > 0); + block->pins--; +#ifdef PAGECACHE_DEBUG + { + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink(info); + my_free((gptr) info, MYF(0)); + } +#endif + DBUG_VOID_RETURN; +} +#ifdef PAGECACHE_DEBUG +void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); + info->thread= my_thread_var; + info->write_lock= wl; + info_link((PAGECACHE_PIN_INFO **)&block->lock_list, + (PAGECACHE_PIN_INFO *)info); +} +void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink((PAGECACHE_PIN_INFO *)info); + my_free((gptr)info, MYF(0)); +} +void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0 && info->write_lock != wl); + info->write_lock= wl; +} +#else +#define pagecache_add_lock(B,W) +#define pagecache_remove_lock(B) +#define pagecache_change_lock(B,W) +#endif + +/* + Put on the block "update" type lock + + SYNOPSIS + pagecache_lock_block() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK + 1 - Try to lock the block failed +*/ + +my_bool pagecache_lock_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_lock_block"); + BLOCK_INFO(block); + if (block->status & BLOCK_WRLOCK) + { + DBUG_PRINT("info", ("fail to lock, waiting...")); + /* Lock failed we will wait */ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); + dec_counter_for_resize_op(pagecache); + do + { + KEYCACHE_DBUG_PRINT("pagecache_lock_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + DBUG_ASSERT(0); +#endif + BLOCK_INFO(block); + DBUG_RETURN(1); + } + /* we are doing it by global cache mutex protectio, so it is OK */ + block->status|= BLOCK_WRLOCK; + DBUG_RETURN(0); +} + +void pagecache_ulock_block(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_ulock_block"); + BLOCK_INFO(block); + DBUG_ASSERT(block->status & BLOCK_WRLOCK); + block->status&= ~BLOCK_WRLOCK; +#ifdef THREAD + /* release all threads waiting for write lock */ + if (block->wqueue[COND_FOR_WRLOCK].last_thread) + release_queue(&block->wqueue[COND_FOR_WRLOCK]); +#endif + BLOCK_INFO(block); + DBUG_VOID_RETURN; +} + +/* + Try to lock/uplock and pin/unpin the block + + SYNOPSIS + pagecache_make_lock_and_pin() + pagecache pointer to a page cache data structure + block the block to work with + lock lock change mode + pin pinchange mode + + RETURN + 0 - OK + 1 - Try to lock the block failed +*/ + +my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + DBUG_ENTER("pagecache_make_lock_and_pin"); + DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s", + (ulong)block, BLOCK_NUMBER(pagecache, block), + ((block->status & BLOCK_WRLOCK)?'Y':'N'), + block->pins, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + BLOCK_INFO(block); +#ifdef PAGECACHE_DEBUG + DBUG_ASSERT(info_check_pin(block, pin) == 0 && + info_check_lock(block, lock, pin) == 0); +#endif + switch (lock) + { + case PAGECACHE_LOCK_WRITE: /* free -> write */ + /* Writelock and pin the buffer */ + if (pagecache_lock_block(pagecache, block)) + { + DBUG_PRINT("info", ("restart")); + /* in case of fail pagecache_lock_block unlock cache */ + DBUG_RETURN(1); + } + /* The cache is locked so nothing afraid off */ + pagecache_add_pin(block); + pagecache_add_lock(block, 1); + break; + case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ + case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ + /* + Removes writelog and puts read lock (which is nothing in our + implementation) + */ + pagecache_ulock_block(block); + case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ + case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ +#ifndef DBUG_OFF + if (pin == PAGECACHE_UNPIN) + { + pagecache_remove_pin(block); + } +#endif +#ifdef PAGECACHE_DEBUG + if (lock == PAGECACHE_LOCK_WRITE_TO_READ) + { + pagecache_change_lock(block, 0); + } else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK) + { + pagecache_remove_lock(block); + } +#endif + break; + case PAGECACHE_LOCK_READ: /* free -> read */ +#ifndef DBUG_OFF + if (pin == PAGECACHE_PIN) + { + /* The cache is locked so nothing afraid off */ + pagecache_add_pin(block); + } + pagecache_add_lock(block, 0); + break; +#endif + case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ + case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ + break; /* do nothing */ + default: + DBUG_ASSERT(0); /* Never should happened */ + } + + BLOCK_INFO(block); + DBUG_RETURN(0); +} + + +/* + Read into a key cache block buffer from disk. + + SYNOPSIS + + read_block() + pagecache pointer to a page cache data structure + block block to which buffer the data is to be read + primary <-> the current thread will read the data + + RETURN VALUE + None + + NOTES. + The function either reads a page data from file to the block buffer, + or waits until another thread reads it. What page to read is determined + by a block parameter - reference to a hash link for this page. + If an error occurs THE BLOCK_ERROR bit is set in the block status. +*/ + +static void read_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + my_bool primary) +{ + uint got_length; + + /* On entry cache_lock is locked */ + + KEYCACHE_THREAD_TRACE("read_block"); + if (primary) + { + /* + This code is executed only by threads + that submitted primary requests + */ + + KEYCACHE_DBUG_PRINT("read_block", + ("page to be read by primary request")); + + /* Page is not in buffer yet, is to be read from disk */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + Here other threads may step in and register as secondary readers. + They will register in block->wqueue[COND_FOR_REQUESTED]. + */ + got_length= pagecache_fread(pagecache, &block->hash_link->file, + block->buffer, + block->hash_link->pageno, MYF(0)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (got_length < pagecache->block_size) + block->status|= BLOCK_ERROR; + else + block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); + + KEYCACHE_DBUG_PRINT("read_block", + ("primary request: new page in cache")); + /* Signal that all pending requests for this page now can be processed */ + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + release_queue(&block->wqueue[COND_FOR_REQUESTED]); + } + else + { + /* + This code is executed only by threads + that submitted secondary requests + */ + KEYCACHE_DBUG_PRINT("read_block", + ("secondary request waiting for new page to be read")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into a queue and wait until it can be processed */ + add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); + do + { + KEYCACHE_DBUG_PRINT("read_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + KEYCACHE_DBUG_PRINT("read_block", + ("secondary request: new page in cache")); + } +} + + +/* + Unlock/unpin page and put LSN stamp if it need + + SYNOPSIS + pagecache_unlock_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + pin pin page + stamp_this_page put LSN stamp on the page + first_REDO_LSN_for_page +*/ + +void pagecache_unlock_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unlock_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* we do not allow any lock/pin increasing here */ + DBUG_ASSERT(pin != PAGECACHE_PIN && + lock != PAGECACHE_LOCK_READ && + lock != PAGECACHE_LOCK_WRITE); + if (pin == PAGECACHE_PIN_LEFT_UNPINNED && + lock == PAGECACHE_LOCK_READ_UNLOCK) + { +#ifndef DBUG_OFF + if ( +#endif + /* block do not need here so we do not provide it */ + pagecache_make_lock_and_pin(pagecache, 0, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + DBUG_VOID_RETURN; + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + if (stamp_this_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && + pin == PAGECACHE_UNPIN); + /* TODO: insert LSN writing code */ + } + +#ifndef DBUG_OFF + if ( +#endif + pagecache_make_lock_and_pin(pagecache, block, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + + SYNOPSIS + pagecache_unpin_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file +*/ + +void pagecache_unpin_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unpin_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu", + (uint) file->file, (ulong) pageno)); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + +#ifndef DBUG_OFF + if ( +#endif + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unlock/unpin page and put LSN stamp if it need + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unlock() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) + lock lock change + pin pin page + stamp_this_page put LSN stamp on the page + first_REDO_LSN_for_page +*/ + +void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unlock"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu l%s p%s", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* we do not allow any lock/pin increasing here */ + DBUG_ASSERT(pin != PAGECACHE_PIN && + lock != PAGECACHE_LOCK_READ && + lock != PAGECACHE_LOCK_WRITE); + if (pin == PAGECACHE_PIN_LEFT_UNPINNED && + lock == PAGECACHE_LOCK_READ_UNLOCK) + { +#ifndef DBUG_OFF + if ( +#endif + /* block do not need here so we do not provide it */ + pagecache_make_lock_and_pin(pagecache, 0, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + DBUG_VOID_RETURN; + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + if (stamp_this_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && + pin == PAGECACHE_UNPIN); + /* TODO: insert LSN writing code */ + } + +#ifndef DBUG_OFF + if ( +#endif + pagecache_make_lock_and_pin(pagecache, block, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unpin_page() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) +*/ + +void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unpin"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno)); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + +#ifndef DBUG_OFF + if ( +#endif + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Read a block of data from a cached file into a buffer; + + SYNOPSIS + pagecache_read() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + link link to the page if we pin it + + RETURN VALUE + Returns address from where the data is placed if sucessful, 0 - otherwise. + + NOTES. + + The function ensures that a block of data of size length from file + positioned at pageno is in the buffers for some key cache blocks. + Then the function copies the data into the buffer buff. + + Pin will be choosen according to lock parameter (see lock_to_pin) +*/ +static enum pagecache_page_pin lock_to_pin[]= +{ + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +byte *pagecache_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + PAGECACHE_PAGE_LINK fake_link; + DBUG_ENTER("page_cache_read"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + + if (!link) + link= &fake_link; + else + *link= 0; + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + reg1 PAGECACHE_BLOCK_LINK *block; + uint status; + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_r_requests++; + block= find_key_block(pagecache, file, pageno, level, 0, + (((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)) ? 0 : 1), + &page_st); + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto restart; + } + if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + { + /* The requested page is to be read into the block buffer */ + read_block(pagecache, block, + (my_bool)(page_st == PAGE_TO_BE_READ)); + } + + if (! ((status= block->status) & BLOCK_ERROR)) + { +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); +#endif + + DBUG_ASSERT((pagecache->block_size & 511) == 0); + /* Copy data from the cache buffer */ + bmove512(buff, block->buffer, pagecache->block_size); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_lock(&pagecache->cache_lock); +#endif + } + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + if (status & BLOCK_ERROR) + DBUG_RETURN((byte *) 0); + + DBUG_RETURN(buff); + } + +no_key_cache: /* Key cache is not used */ + + /* We can't use mutex here as the key cache may not be initialized */ + pagecache->global_cache_r_requests++; + pagecache->global_cache_read++; + if (pagecache_fread(pagecache, file, (byte*) buff, pageno, MYF(MY_NABP))) + error= 1; + DBUG_RETURN(error ? (byte*) 0 : buff); +} + + +/* + Delete page from the buffer + + SYNOPSIS + pagecache_delete_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + flush flush page if it is dirty + + RETURN VALUE + 0 - deleted or was not present at all + 1 - error + + NOTES. + lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked + before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete) +*/ +my_bool pagecache_delete_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + DBUG_ENTER("pagecache_delete_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || + lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK **unused_start, *link; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + goto end; + + inc_counter_for_resize_op(pagecache); + link= get_present_hash_link(pagecache, file, pageno, &unused_start); + if (!link) + { + DBUG_PRINT("info", ("There is no fuch page in the cache")); + DBUG_RETURN(0); + } + block= link->block; + DBUG_ASSERT(block != 0); + if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto restart; + } + + if (block->status & BLOCK_CHANGED && flush) + { + if (flush) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + if (error) + { + block->status|= BLOCK_ERROR; + goto err; + } + } + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + + } + /* Cache is locked, so we can relese page before freeing it */ + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + if (pin == PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + free_block(pagecache, block); + +err: + dec_counter_for_resize_op(pagecache); +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + } + + DBUG_RETURN(error); +} + + +/* + Write a buffer into a cached file. + + SYNOPSIS + + pagecache_write() + pagecache pointer to a page cache data structure + file handler for the file to write data to + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + pin pin page + write_mode how to write page + link link to the page if we pin it + + RETURN VALUE + 0 if a success, 1 - otherwise. +*/ + +struct write_lock_change +{ + int need_lock_change; + enum pagecache_page_lock new_lock; + enum pagecache_page_lock unlock_lock; +}; + +static struct write_lock_change write_lock_change_table[]= +{ + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + {0, /*unsupported*/ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/, + {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/, + {0, /*unsupported*/ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +struct write_pin_change +{ + enum pagecache_page_pin new_pin; + enum pagecache_page_pin unlock_pin; +}; + +static struct write_pin_change write_pin_change_table[]= +{ + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/, + {PAGECACHE_PIN, + PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/, + {PAGECACHE_PIN, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/, + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/ +}; + +my_bool pagecache_write(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link) +{ + reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_PAGE_LINK fake_link; + int error= 0; + int need_lock_change= write_lock_change_table[lock].need_lock_change; + DBUG_ENTER("pagecache_write"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s m%s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin], + page_cache_page_write_mode_str[write_mode])); + DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED && + lock != PAGECACHE_LOCK_READ_UNLOCK); + if (!link) + link= &fake_link; + else + *link= 0; + + if (write_mode == PAGECACHE_WRITE_NOW) + { + /* we allow direct write if wwe do not use long term lockings */ + DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED); + /* Force writing from buff into disk */ + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + DBUG_RETURN(1); + } +restart: + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of key_cache_write", 1);); +#endif + + if (pagecache->can_be_used) + { + /* Key cache is used */ + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_w_requests++; + block= find_key_block(pagecache, file, pageno, level, + (write_mode == PAGECACHE_WRITE_DONE ? 0 : 1), + (((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)) ? 0 : 1), + &page_st); + if (!block) + { + DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); + /* It happens only for requests submitted during resize operation */ + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* Write to the disk key cache is in resize at the moment*/ + goto no_key_cache; + } + + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + + if (pagecache_make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].new_lock, + (need_lock_change ? + write_pin_change_table[pin].new_pin : + pin))) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto restart; + } + + + if (write_mode == PAGECACHE_WRITE_DONE) + { + if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + { + /* Copy data from buff */ + bmove512(block->buffer, buff, pagecache->block_size); + block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); + KEYCACHE_DBUG_PRINT("key_cache_insert", + ("primary request: new page in cache")); + /* Signal that all pending requests for this now can be processed. */ + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + release_queue(&block->wqueue[COND_FOR_REQUESTED]); + } + } + else + { + if (write_mode == PAGECACHE_WRITE_NOW) + { + /* buff has been written to disk at start */ + if (block->status & BLOCK_CHANGED) + link_to_file_list(pagecache, block, &block->hash_link->file, 1); + } + else + { + if (! (block->status & BLOCK_CHANGED)) + link_to_changed_list(pagecache, block); + } + if (! (block->status & BLOCK_ERROR)) + { + bmove512(block->buffer, buff, pagecache->block_size); + } + block->status|= BLOCK_READ; + } + + + if (need_lock_change) + { +#ifndef DBUG_OFF + int rc= +#endif + pagecache_make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].unlock_lock, + write_pin_change_table[pin].unlock_pin); +#ifndef DBUG_OFF + DBUG_ASSERT(rc == 0); +#endif + } + + /* Unregister the request */ + block->hash_link->requests--; + if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + + if (block->status & BLOCK_ERROR) + error= 1; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + goto end; + } + +no_key_cache: + /* Key cache is not used */ + if (write_mode == PAGECACHE_WRITE_DELAY) + { + pagecache->global_cache_w_requests++; + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, (byte*) buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + error=1; + } + +end: +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("exec", + test_key_cache(pagecache, "end of key_cache_write", 1);); +#endif + DBUG_RETURN(error); +} + + +/* + Free block: remove reference to it from hash table, + remove it from the chain file of dirty/clean blocks + and add it to the free list. +*/ + +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block %u to be freed, hash_link %p", + BLOCK_NUMBER(pagecache, block), block->hash_link)); + if (block->hash_link) + { + /* + While waiting for readers to finish, new readers might request the + block. But since we set block->status|= BLOCK_REASSIGNED, they + will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled + later. + */ + block->status|= BLOCK_REASSIGNED; + wait_for_readers(pagecache, block); + unlink_hash(pagecache, block->hash_link); + } + + unlink_changed(block); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block is freed")); + unreg_request(pagecache, block, 0); + block->hash_link= NULL; + + /* Remove the free block from the LRU ring. */ + unlink_block(pagecache, block); + if (block->temperature == BLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= BLOCK_COLD; + /* Insert the free block in the free list. */ + block->next_used= pagecache->free_block_list; + pagecache->free_block_list= block; + /* Keep track of the number of currently unused blocks. */ + pagecache->blocks_unused++; + + /* All pending requests for this page must be resubmitted. */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + release_queue(&block->wqueue[COND_FOR_SAVED]); +} + + +static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b) +{ + return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 : + ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0); +} + + +/* + Flush a portion of changed blocks to disk, + free used blocks if requested +*/ + +static int flush_cached_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + PAGECACHE_BLOCK_LINK **cache, + PAGECACHE_BLOCK_LINK **end, + enum flush_type type) +{ + int error; + int last_errno= 0; + uint count= (uint) (end-cache); + DBUG_ENTER("flush_cached_blocks"); + + /* Don't lock the cache during the flush */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH + we are guarunteed no thread will change them + */ + qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + for (; cache != end; cache++) + { + PAGECACHE_BLOCK_LINK *block= *cache; + + if (block->pins) + { + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u (0x%lx) pinned", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block %u (0x%lx) pinned", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + BLOCK_INFO(block); + last_errno= -1; + unreg_request(pagecache, block, 1); + continue; + } + /* if the block is not pinned then it is not write locked */ + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); +#ifndef DBUG_OFF + { + int rc= +#endif + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); +#ifndef DBUG_OFF + DBUG_ASSERT(rc == 0); + } +#endif + + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u (0x%lx) to be flushed", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block %u (0x%lx) to be flushed", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + BLOCK_INFO(block); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("block %u (0x%lx) pins: %u", + BLOCK_NUMBER(pagecache, block), (ulong)block, + block->pins)); + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + + pagecache->global_cache_write++; + if (error) + { + block->status|= BLOCK_ERROR; + if (!last_errno) + last_errno= errno ? errno : -1; + } + /* + Let to proceed for possible waiting requests to write to the block page. + It might happen only during an operation to resize the key cache. + */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + release_queue(&block->wqueue[COND_FOR_SAVED]); + /* type will never be FLUSH_IGNORE_CHANGED here */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + else + { + block->status&= ~BLOCK_IN_FLUSH; + link_to_file_list(pagecache, block, file, 1); + unreg_request(pagecache, block, 1); + } + } + DBUG_RETURN(last_errno); +} + + +/* + flush all key blocks for a file to disk, but don't do any mutex locks + + flush_pagecache_blocks_int() + pagecache pointer to a key cache data structure + file handler for the file to flush to + flush_type type of the flush + + NOTES + This function doesn't do any mutex locks because it needs to be called + both from flush_pagecache_blocks and flush_all_key_blocks (the later one + does the mutex lock in the resize_pagecache() function). + + RETURN + 0 ok + 1 error +*/ + +static int flush_pagecache_blocks_int(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + enum flush_type type) +{ + PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; + int last_errno= 0; + DBUG_ENTER("flush_pagecache_blocks_int"); + DBUG_PRINT("enter",("file: %d blocks_used: %d blocks_changed: %d", + file->file, pagecache->blocks_used, pagecache->blocks_changed)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, + "start of flush_pagecache_blocks", 0);); +#endif + + cache= cache_buff; + if (pagecache->disk_blocks > 0 && + (!my_disable_flush_pagecache_blocks || type != FLUSH_KEEP)) + { + /* Key cache exists and flush is not disabled */ + int error= 0; + uint count= 0; + PAGECACHE_BLOCK_LINK **pos, **end; + PAGECACHE_BLOCK_LINK *first_in_switch= NULL; + PAGECACHE_BLOCK_LINK *block, *next; +#if defined(PAGECACHE_DEBUG) + uint cnt= 0; +#endif + + if (type != FLUSH_IGNORE_CHANGED) + { + /* + Count how many key blocks we have to cache to be able + to flush all dirty pages with minimum seek moves + */ + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= block->next_changed) + { + if (block->hash_link->file.file == file->file) + { + count++; + KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used); + } + } + /* Allocate a new buffer only if its bigger than the one we have */ + if (count > FLUSH_CACHE && + !(cache= + (PAGECACHE_BLOCK_LINK**) + my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0)))) + { + cache= cache_buff; + count= FLUSH_CACHE; + } + } + + /* Retrieve the blocks and write them to a buffer to be flushed */ +restart: + end= (pos= cache)+count; + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file) + { + /* + Mark the block with BLOCK_IN_FLUSH in order not to let + other threads to use it for new pages and interfere with + our sequence ot flushing dirty file pages + */ + block->status|= BLOCK_IN_FLUSH; + + if (! (block->status & BLOCK_IN_SWITCH)) + { + /* + We care only for the blocks for which flushing was not + initiated by other threads as a result of page swapping + */ + reg_requests(pagecache, block, 1); + if (type != FLUSH_IGNORE_CHANGED) + { + /* It's not a temporary file */ + if (pos == end) + { + /* + This happens only if there is not enough + memory for the big block + */ + if ((error= flush_cached_blocks(pagecache, file, cache, + end,type))) + last_errno=error; + /* + Restart the scan as some other thread might have changed + the changed blocks chain: the blocks that were in switch + state before the flush started have to be excluded + */ + goto restart; + } + *pos++= block; + } + else + { + /* It's a temporary file */ + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + } + else + { + /* Link the block into a list of blocks 'in switch' */ + unlink_changed(block); + link_changed(block, &first_in_switch); + } + } + } + if (pos != cache) + { + if ((error= flush_cached_blocks(pagecache, file, cache, pos, type))) + last_errno= error; + } + /* Wait until list of blocks in switch is empty */ + while (first_in_switch) + { +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + block= first_in_switch; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + } + /* The following happens very seldom */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { +#if defined(PAGECACHE_DEBUG) + cnt=0; +#endif + for (block= pagecache->file_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file && + (! (block->status & BLOCK_CHANGED) + || type == FLUSH_IGNORE_CHANGED)) + { + reg_requests(pagecache, block, 1); + free_block(pagecache, block); + } + } + } + } + +#ifndef DBUG_OFF + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of flush_pagecache_blocks", 0);); +#endif + if (cache != cache_buff) + my_free((gptr) cache, MYF(0)); + if (last_errno) + errno=last_errno; /* Return first error */ + DBUG_RETURN(last_errno != 0); +} + + +/* + Flush all blocks for a file to disk + + SYNOPSIS + + flush_pagecache_blocks() + pagecache pointer to a page cache data structure + file handler for the file to flush to + flush_type type of the flush + + RETURN + 0 ok + 1 error +*/ + +int flush_pagecache_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, enum flush_type type) +{ + int res; + DBUG_ENTER("flush_pagecache_blocks"); + DBUG_PRINT("enter", ("pagecache: 0x%lx", pagecache)); + + if (pagecache->disk_blocks <= 0) + DBUG_RETURN(0); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + inc_counter_for_resize_op(pagecache); + res= flush_pagecache_blocks_int(pagecache, file, type); + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Flush all blocks in the key cache to disk +*/ + +static int flush_all_key_blocks(PAGECACHE *pagecache) +{ +#if defined(PAGECACHE_DEBUG) + uint cnt=0; +#endif + while (pagecache->blocks_changed > 0) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->used_last->next_used ; ; block=block->next_used) + { + if (block->hash_link) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, + FLUSH_RELEASE)) + return 1; + break; + } + if (block == pagecache->used_last) + break; + } + } + return 0; +} + + +/* + Reset the counters of a key cache. + + SYNOPSIS + reset_key_cache_counters() + name the name of a key cache + key_cache pointer to the key kache to be reset + + DESCRIPTION + This procedure is used by process_key_caches() to reset the counters of all + currently used key caches, both the default one and the named ones. + + RETURN + 0 on success (always because it can't fail) +*/ + +int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) +{ + DBUG_ENTER("reset_key_cache_counters"); + if (!key_cache->inited) + { + DBUG_PRINT("info", ("Key cache %s not initialized.", name)); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); + + key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + key_cache->global_cache_r_requests= 0; /* Key_read_requests */ + key_cache->global_cache_read= 0; /* Key_reads */ + key_cache->global_cache_w_requests= 0; /* Key_write_requests */ + key_cache->global_cache_write= 0; /* Key_writes */ + DBUG_RETURN(0); +} + + +#ifndef DBUG_OFF +/* + Test if disk-cache is ok +*/ +static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)), + const char *where __attribute__((unused)), + my_bool lock __attribute__((unused))) +{ + /* TODO */ +} +#endif + +#if defined(PAGECACHE_TIMEOUT) + +#define KEYCACHE_DUMP_FILE "pagecache_dump.txt" +#define MAX_QUEUE_LEN 100 + + +static void pagecache_dump(PAGECACHE *pagecache) +{ + FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); + struct st_my_thread_var *last; + struct st_my_thread_var *thread; + PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_PAGE *page; + uint i; + + fprintf(pagecache_dump_file, "thread:%u\n", thread->id); + + i=0; + thread=last=waiting_for_hash_link.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n"); + if (thread) + do + { + thread= thread->next; + page= (PAGECACHE_PAGE *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u, (file,pageno)=(%u,%lu)\n", + thread->id,(uint) page->file.file,(ulong) page->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + i=0; + thread=last=waiting_for_block.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for block\n"); + if (thread) + do + { + thread=thread->next; + hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n", + thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), + (uint) hash_link->file.file,(ulong) hash_link->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + for (i=0 ; i < pagecache->blocks_used ; i++) + { + int j; + block= &pagecache->block_root[i]; + hash_link= block->hash_link; + fprintf(pagecache_dump_file, + "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", + i, (int) (hash_link ? + PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) : + -1), + block->status, block->requests, block->condvar ? 1 : 0); + for (j=0 ; j < COND_SIZE; j++) + { + PAGECACHE_WQUEUE *wqueue=&block->wqueue[j]; + thread= last= wqueue->last_thread; + fprintf(pagecache_dump_file, "queue #%d\n", j); + if (thread) + { + do + { + thread=thread->next; + fprintf(pagecache_dump_file, + "thread:%u\n", thread->id); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + } + } + } + fprintf(pagecache_dump_file, "LRU chain:"); + block= pagecache= used_last; + if (block) + { + do + { + block= block->next_used; + fprintf(pagecache_dump_file, + "block:%u, ", BLOCK_NUMBER(pagecache, block)); + } + while (block != pagecache->used_last); + } + fprintf(pagecache_dump_file, "\n"); + + fclose(pagecache_dump_file); +} + +#endif /* defined(PAGECACHE_TIMEOUT) */ + +#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) + + +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + struct timeval now; /* time when we started waiting */ + struct timespec timeout; /* timeout value for the wait function */ + struct timezone tz; +#if defined(PAGECACHE_DEBUG) + int cnt=0; +#endif + + /* Get current time */ + gettimeofday(&now, &tz); + /* Prepare timeout value */ + timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT; + /* + timeval uses microseconds. + timespec uses nanoseconds. + 1 nanosecond = 1000 micro seconds + */ + timeout.tv_nsec= now.tv_usec * 1000; + KEYCACHE_THREAD_TRACE_END("started waiting"); +#if defined(PAGECACHE_DEBUG) + cnt++; + if (cnt % 100 == 0) + fprintf(pagecache_debug_log, "waiting...\n"); + fflush(pagecache_debug_log); +#endif + rc= pthread_cond_timedwait(cond, mutex, &timeout); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + if (rc == ETIMEDOUT || rc == ETIME) + { +#if defined(PAGECACHE_DEBUG) + fprintf(pagecache_debug_log,"aborted by pagecache timeout\n"); + fclose(pagecache_debug_log); + abort(); +#endif + pagecache_dump(); + } + +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); +#else + assert(rc != ETIMEDOUT); +#endif + return rc; +} +#else +#if defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + KEYCACHE_THREAD_TRACE_END("started waiting"); + rc= pthread_cond_wait(cond, mutex); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + return rc; +} +#endif +#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int rc; + rc= pthread_mutex_lock(mutex); + KEYCACHE_THREAD_TRACE_BEGIN(""); + return rc; +} + + +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + KEYCACHE_THREAD_TRACE_END(""); + pthread_mutex_unlock(mutex); + return; +} + + +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond) +{ + int rc; + KEYCACHE_THREAD_TRACE("signal"); + rc= pthread_cond_signal(cond); + return rc; +} + + +#if defined(PAGECACHE_DEBUG_LOG) + + +static void pagecache_debug_print(const char * fmt, ...) +{ + va_list args; + va_start(args,fmt); + if (pagecache_debug_log) + { + VOID(vfprintf(pagecache_debug_log, fmt, args)); + VOID(fputc('\n',pagecache_debug_log)); + } + va_end(args); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) + + +void pagecache_debug_log_close(void) +{ + if (pagecache_debug_log) + fclose(pagecache_debug_log); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#endif /* defined(PAGECACHE_DEBUG) */ diff --git a/mysys/test_file.c b/mysys/test_file.c new file mode 100644 index 00000000000..7ffca48023d --- /dev/null +++ b/mysys/test_file.c @@ -0,0 +1,70 @@ +#include "mysys_priv.h" +#include "my_dir.h" +#include +#include +#include +#include "test_file.h" + + +/* + Check that file contance correspond to descriptor + + SYNOPSIS + test_file() + file File to test + file_name Path (and name) of file which is tested + size size of file + buff_size size of buffer which is enought to check the file + desc file descriptor to check with + + RETURN + 1 file if OK + 0 error +*/ + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc) +{ + MY_STAT stat_buff, *stat; + unsigned char *buffr= malloc(buff_size); + off_t pos= 0; + size_t byte; + int step= 0; + + if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL) + { + diag("Can't stat() %s (errno: %d)\n", file_name, errno); + return 0; + } + if (stat->st_size != size) + { + diag("file %s size is %lu (should be %lu)\n", + file_name, (ulong) stat->st_size, (ulong) size); + return 0; + } + /* check content */ + my_seek(file.file, 0, SEEK_SET, MYF(0)); + while (desc[step].length != 0) + { + if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) != + desc[step].length) + { + diag("Can't read %u bytes from %s (errno: %d)\n", + (uint)desc[step].length, file_name, errno); + return 0; + } + for (byte= 0; byte < desc[step].length; byte++) + { + if (buffr[byte] != desc[step].content) + { + diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n", + file_name, (uint) buffr[byte], (ulong) (pos + byte), + desc[step].content); + return 0; + } + } + pos+= desc[step].length; + step++; + } + return 1; +} diff --git a/mysys/test_file.h b/mysys/test_file.h new file mode 100644 index 00000000000..ea787c123ed --- /dev/null +++ b/mysys/test_file.h @@ -0,0 +1,14 @@ + +#include + +/* + File content descriptor +*/ +struct file_desc +{ + unsigned int length; + unsigned char content; +}; + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc); diff --git a/mysys/test_pagecache_consist.c b/mysys/test_pagecache_consist.c new file mode 100755 index 00000000000..1cc54af2460 --- /dev/null +++ b/mysys/test_pagecache_consist.c @@ -0,0 +1,447 @@ +#include "mysys_priv.h" +#include "../include/my_pthread.h" +#include "../include/pagecache.h" +#include +#include "my_dir.h" +#include +#include +#include +#include +#include "../unittest/mytap/tap.h" +#include + +/*#define PAGE_SIZE 65536*/ +#define PCACHE_SIZE (PAGE_SIZE*1024*20) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +#ifdef TEST_HIGH_CONCURENCY +static uint number_of_readers= 10; +static uint number_of_writers= 20; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_HIGH_CONCURENCY*/ +#ifdef TEST_READERS +static uint number_of_readers= 10; +static uint number_of_writers= 1; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_READERS*/ +#ifdef TEST_WRITERS +static uint number_of_readers= 0; +static uint number_of_writers= 10; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_WRITERS*/ +static uint number_of_readers= 10; +static uint number_of_writers= 10; +static uint number_of_tests= 50000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20000; +static uint flush_divider= 1000; +#endif /*TEST_WRITERS*/ +#endif /*TEST_READERS*/ +#endif /*TEST_HIGH_CONCURENCY*/ + + +/* check page consistemcy */ +uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, + int tag) +{ + uint end= sizeof(uint); + uint num= *((uint *)buff); + uint i; + DBUG_ENTER("check_page"); + + for (i= 0; i < num; i++) + { + uint len= *((uint *)(buff + end)); + uint j; + end+= sizeof(uint)+ sizeof(uint); + if (len + end > PAGE_SIZE) + { + diag("incorrect field header #%u by offset %lu\n", i, offset + end + j); + goto err; + } + for(j= 0; j < len; j++) + { + if (buff[end + j] != (uchar)((i+1) % 256)) + { + diag("incorrect %lu byte\n", offset + end + j); + goto err; + } + } + end+= len; + } + for(i= end; i < PAGE_SIZE; i++) + { + if (buff[i] != 0) + { + int h; + DBUG_PRINT("err", + ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag)); + diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag); + h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0)); + my_pwrite(h, buff, PAGE_SIZE, 0, MYF(0)); + my_close(h, MYF(0)); + goto err; + } + } + DBUG_RETURN(end); +err: + DBUG_PRINT("err", ("try to flush")); + if (page_locked) + { + pagecache_delete_page(&pagecache, &file1, page_no, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + } + else + { + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + } + exit(1); +} + +void put_rec(uchar *buff, uint end, uint len, uint tag) +{ + uint i; + uint num= *((uint *)buff); + if (!len) + len= 1; + if (end + sizeof(uint)*2 + len > PAGE_SIZE) + return; + *((uint *)(buff + end))= len; + end+= sizeof(uint); + *((uint *)(buff + end))= tag; + end+= sizeof(uint); + num++; + *((uint *)buff)= num; + *((uint*)(buff + end))= len; + for (i= end; i < (len + end); i++) + { + buff[i]= (uchar) num % 256; + } +} + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + + +void reader(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint page= rand()/(RAND_MAX/number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + check_page(buffr, page * PAGE_SIZE, 0, page, -num); + if (i % 500 == 0) + printf("reader%d: %d\n", num, i); + + } + printf("reader%d: done\n", num); + free(buffr); +} + + +void writer(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint end; + uint page= rand()/(RAND_MAX/number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + end= check_page(buffr, page * PAGE_SIZE, 1, page, num); + put_rec(buffr, end, rand()/(RAND_MAX/record_length_limit), num); + pagecache_write(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + if (i % flush_divider == 0) + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + if (i % 500 == 0) + printf("writer%d: %d\n", num, i); + } + printf("writer%d: done\n", num); + free(buffr); +} + + +static void *test_thread_reader(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_reader"); + DBUG_PRINT("enter", ("param: %d", param)); + + reader(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +static void *test_thread_writer(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_writer"); + DBUG_PRINT("enter", ("param: %d", param)); + + writer(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +int main(int argc, char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifndef pthread_attr_setstacksize /* void return value */ + if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) + { + fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", + error,errno); + exit(1); + } +#endif +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE, 0)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + { + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + memset(buffr, '\0', PAGE_SIZE); + for (i= 0; i < number_of_pages; i++) + { + pagecache_write(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + free(buffr); + } + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + while (number_of_readers != 0 || number_of_writers != 0) + { + if (number_of_readers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_readers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_reader, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_readers--; + } + if (number_of_writers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_writers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + } + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_thread_global_end(); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(0); +} diff --git a/mysys/test_pagecache_single.c b/mysys/test_pagecache_single.c new file mode 100644 index 00000000000..9df7844cfa5 --- /dev/null +++ b/mysys/test_pagecache_single.c @@ -0,0 +1,589 @@ +#include "mysys_priv.h" +#include "../include/my_pthread.h" +#include "../include/pagecache.h" +#include "my_dir.h" +#include +#include +#include +#include +#include +#include "../unittest/mytap/tap.h" +#include "test_file.h" + +/* #define PAGE_SIZE 1024 */ +#define PCACHE_SIZE (PAGE_SIZE*1024*10) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +/* + File contance descriptors +*/ +static struct file_desc simple_read_write_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_read_change_write_read_test_file[]= +{ + {PAGE_SIZE/2, '\65'}, + {PAGE_SIZE/2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file1[]= +{ + {PAGE_SIZE*2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file2[]= +{ + {PAGE_SIZE/2, '\1'}, + {PAGE_SIZE/2, (unsigned char)129}, + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_forget_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_flush_test_file[]= +{ + {PAGE_SIZE, '\2'}, + { 0, 0} +}; + + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + +/* + Write then read page, check file on disk +*/ + +int simple_read_write_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_write_test"); + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_write_test_file))), + "Simple write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, then read (and lock), change (write new value and unlock), + then check the page in the cache and on the disk +*/ +int simple_read_change_write_read_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_change_write_read_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + memset(buffw, '\65', PAGE_SIZE/2); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple read-change-write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_change_write_read_test_file))), + "Simple read-change-write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, read page 0 (and pin) then write page 1 and page 0. + Flush the file (shold flush only page 1 and return 1 (page 0 is + still pinned). + Check file on the disk. + Unpin and flush. + Check file on the disk. +*/ +int simple_pin_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_pin_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* test */ + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("error in flush_pagecache_blocks\n"); + exit(1); + } + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + memset(buffw + PAGE_SIZE/2, ((unsigned char) 129), PAGE_SIZE/2); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* + We have to get error because one page of the file is pinned, + other page should be flushed + */ + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Did not get error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2, + simple_pin_test_file1))), + "Simple pin page file with pin"); + pagecache_unlock_page(&pagecache, + &file1, + 0, + PAGECACHE_LOCK_READ_UNLOCK, + PAGECACHE_UNPIN, + 0, 0); + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE, + simple_pin_test_file2))), + "Simple pin page result file"); + if (res) + reset_file(file1, file1_name); +err: + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page, write new value, then delete page from cache without flush, + on the disk should be page with old content written during preparation +*/ + +int simple_delete_forget_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_forget_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + memset(buffw, '\2', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete_page(&pagecache, &file1, 0, + PAGECACHE_LOCK_WRITE, 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_forget_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page with locking, write new content to the page, + delete page with flush and on existing lock, + check that page on disk contain new value. +*/ + +int simple_delete_flush_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_flush_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + PAGECACHE_PIN, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + memset(buffw, '\2', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete_page(&pagecache, &file1, 0, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_flush_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + write then read file bigger then cache +*/ + +int simple_big_test() +{ + unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE); + unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE); + struct file_desc *desc= + (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2)) * + sizeof(struct file_desc)); + int res, i; + DBUG_ENTER("simple_big_test"); + /* prepare the file twice larger then cache */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + memset(buffw, (unsigned char) (i & 0xff), PAGE_SIZE); + desc[i].length= PAGE_SIZE; + desc[i].content= (i & 0xff); + pagecache_write(&pagecache, &file1, i, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + ok(1, "Simple big file write"); + /* check written pages sequentally read */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + int j; + pagecache_read(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (i & 0xff)) + { + diag("simple_big_test seq: page %u byte %u mismatch\n", i, j); + return 0; + } + } + } + ok(1, "simple big file sequentally read"); + /* chack random reads */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++) + { + int j, page; + page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2)); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (page & 0xff)) + { + diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j); + return 0; + } + } + } + ok(1, "simple big file random read"); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + + ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE, + desc))), + "Simple big file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} +/* + Thread function +*/ + +static void *test_thread(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_thread"); + + DBUG_PRINT("enter", ("param: %d", param)); + + if (!simple_read_write_test() || + !simple_read_change_write_read_test() || + !simple_pin_test() || + !simple_delete_forget_test() || + !simple_delete_flush_test() || + !simple_big_test()) + exit(1); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + + +int main(int argc, char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifndef pthread_attr_setstacksize /* void return value */ + if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) + { + fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", + error,errno); + exit(1); + } +#endif +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE, 0)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + + if ((error=pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + param=(int*) malloc(sizeof(int)); + *param= 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_thread_global_end(); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(0); +} From 30e5a9bd1b853c60e067fe71d61c5f55bb448d85 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 24 Jul 2006 21:50:18 +0200 Subject: [PATCH 11/95] Post-vacation-musing fixes to WL#3071 "Maria checkpoint": changes to how synchronous checkpoint requests are executed. changes to how the background LRD flushing thread refrains from using all resources. See more comments for each file. storage/maria/checkpoint.c: I was not happy that checkpoint requests which want to know the success/error of their executed request, get inaccurate information in case of error (no error string etc). Instead of implementing a more complete communication protocol between requestor and executor, I make the requestor do the execution itself. I call this a synchronous checkpoint. For asynchronous checkpoints (requestor does not want to know success/error, does not want to wait for completion), no change, checkpoint is executed by the background thread. Comments, constants, mutex usage fixes. storage/maria/checkpoint.h: new prototype of "API" (the calls exposed by the checkpoint module) storage/maria/least_recently_dirtied.c: A better solution than sleeping one second after flushing a piece of the LRD: instead we pthread_yield(). Hopefully this will slow down the background thread (avoiding it using all the disk's bandwidth) if there are other threads competing, and will not slow it down if this thread is alone (where we do want it to run fast and not do useless sleeps). This thread will probe for asynchronous checkpoint requests every few seconds. --- storage/maria/checkpoint.c | 305 ++++++++++++------------- storage/maria/checkpoint.h | 10 +- storage/maria/least_recently_dirtied.c | 39 +++- 3 files changed, 172 insertions(+), 182 deletions(-) diff --git a/storage/maria/checkpoint.c b/storage/maria/checkpoint.c index af37377455e..ed18e95c360 100644 --- a/storage/maria/checkpoint.c +++ b/storage/maria/checkpoint.c @@ -6,12 +6,28 @@ /* Here is the implementation of this module */ +/* + Summary: + - there are asynchronous checkpoints (a writer to the log notices that it's + been a long time since we last checkpoint-ed, so posts a request for a + background thread to do a checkpoint; does not care about the success of the + checkpoint). Then the checkpoint is done by the checkpoint thread, at an + unspecified moment ("later") (==soon, of course). + - there are synchronous checkpoints: a thread requests a checkpoint to + happen now and wants to know when it finishes and if it succeeded; then the + checkpoint is done by that same thread. +*/ + #include "page_cache.h" #include "least_recently_dirtied.h" #include "transaction.h" #include "share.h" #include "log.h" +/* could also be called LSN_ERROR */ +#define LSN_IMPOSSIBLE ((LSN)0) +#define LSN_MAX ((LSN)ULONGLONG_MAX) + /* this transaction is used for any system work (purge, checkpoint writing etc), that is, background threads. It will not be declared/initialized here @@ -19,43 +35,112 @@ */ st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,...}; +/* those three are protected by the log's mutex */ /* The maximum rec_lsn in the LRD when last checkpoint was run, serves for the MEDIUM checkpoint. */ LSN max_rec_lsn_at_last_checkpoint= 0; +CHECKPOINT_LEVEL next_asynchronous_checkpoint_to_do= NONE; +CHECKPOINT_LEVEL synchronous_checkpoint_in_progress= NONE; -/* Picks a checkpoint request and executes it */ -my_bool checkpoint() +/* + Used by MySQL client threads requesting a checkpoint (like "ALTER MARIA + ENGINE DO CHECKPOINT"), and probably by maria_panic(). +*/ +my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) +{ + DBUG_ENTER("execute_synchronous_checkpoint"); + DBUG_ASSERT(level > NONE); + + lock(log_mutex); + while ((synchronous_checkpoint_in_progress != NONE) || + (next_asynchronous_checkpoint_to_do != NONE)) + wait_on_checkpoint_done_cond(); + + synchronous_checkpoint_in_progress= level; + execute_checkpoint(level); + safemutex_assert_owner(log_mutex); + synchronous_checkpoint_in_progress= NONE; + unlock(log_mutex); + broadcast(checkpoint_done_cond); +} + +/* Picks a checkpoint request, if there is one, and executes it */ +my_bool execute_asynchronous_checkpoint_if_any() { CHECKPOINT_LEVEL level; - DBUG_ENTER("checkpoint"); + DBUG_ENTER("execute_asynchronous_checkpoint"); - level= checkpoint_running= checkpoint_request; - unlock(log_mutex); - - DBUG_ASSERT(level != NONE); - - switch (level) + lock(log_mutex); + if (likely(next_asynchronous_checkpoint_to_do == NONE)) { - case FULL: - /* flush all pages up to the current end of the LRD */ - flush_all_LRD_to_lsn(MAX_LSN); /* MAX_LSN==ULONGLONG_MAX */ - /* this will go full speed (normal scheduling, no sleep) */ - break; - case MEDIUM: - /* - flush all pages which were already dirty at last checkpoint: - ensures that recovery will never start from before the next-to-last - checkpoint (two-checkpoint rule). - It is max, not min as the WL says (TODO update WL). - */ - flush_all_LRD_to_lsn(max_rec_lsn_at_last_checkpoint); - /* this will go full speed (normal scheduling, no sleep) */ - break; + unlock(log_mutex); + DBUG_RETURN(FALSE); } - - error= checkpoint_indirect(); + + while (synchronous_checkpoint_in_progress) + wait_on_checkpoint_done_cond(); + +do_checkpoint: + level= next_asynchronous_checkpoint_to_do; + DBUG_ASSERT(level > NONE); + execute_checkpoint(level); + safemutex_assert_owner(log_mutex); + if (next_asynchronous_checkpoint_to_do > level) + goto do_checkpoint; /* one more request was posted */ + else + { + DBUG_ASSERT(next_asynchronous_checkpoint_to_do == level); + next_asynchronous_checkpoint_to_do= NONE; /* all work done */ + } + unlock(log_mutex); + broadcast(checkpoint_done_cond); +} + + +/* + Does the actual checkpointing. Called by + execute_synchronous_checkpoint() and + execute_asynchronous_checkpoint_if_any(). +*/ +my_bool execute_checkpoint(CHECKPOINT_LEVEL level) +{ + LSN candidate_max_rec_lsn_at_last_checkpoint; + /* to avoid { lock + no-op + unlock } in the common (==indirect) case */ + my_bool need_log_mutex; + + DBUG_ENTER("execute_checkpoint"); + + safemutex_assert_owner(log_mutex); + copy_of_max_rec_lsn_at_last_checkpoint= max_rec_lsn_at_last_checkpoint; + + if (unlikely(need_log_mutex= (level > INDIRECT))) + { + /* much I/O work to do, release log mutex */ + unlock(log_mutex); + + switch (level) + { + case FULL: + /* flush all pages up to the current end of the LRD */ + flush_all_LRD_to_lsn(LSN_MAX); + /* this will go full speed (normal scheduling, no sleep) */ + break; + case MEDIUM: + /* + flush all pages which were already dirty at last checkpoint: + ensures that recovery will never start from before the next-to-last + checkpoint (two-checkpoint rule). + It is max, not min as the WL says (TODO update WL). + */ + flush_all_LRD_to_lsn(copy_of_max_rec_lsn_at_last_checkpoint); + /* this will go full speed (normal scheduling, no sleep) */ + break; + } + } + + candidate_max_rec_lsn_at_last_checkpoint= checkpoint_indirect(need_log_mutex); lock(log_mutex); /* @@ -66,13 +151,22 @@ my_bool checkpoint() file in the hook but that would be an I/O under the log's mutex, bad. - it would not be nice organisation of code (I tried it :). */ - mark_checkpoint_done(error); - unlock(log_mutex); - DBUG_RETURN(error); + if (candidate_max_rec_lsn_at_last_checkpoint != LSN_IMPOSSIBLE) + { + /* checkpoint succeeded */ + maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; + written_since_last_checkpoint= (my_off_t)0; + DBUG_RETURN(FALSE); + } + /* + keep mutex locked because callers will want to clear mutex-protected + status variables + */ + DBUG_RETURN(TRUE); } -my_bool checkpoint_indirect() +LSN checkpoint_indirect(my_bool need_log_mutex) { DBUG_ENTER("checkpoint_indirect"); @@ -90,7 +184,8 @@ my_bool checkpoint_indirect() DBUG_ASSERT(sizeof(byte *) <= 8); DBUG_ASSERT(sizeof(LSN) <= 8); - lock(log_mutex); /* will probably be in log_read_end_lsn() already */ + if (need_log_mutex) + lock(log_mutex); /* maybe this will clash with log_read_end_lsn() */ checkpoint_start_lsn= log_read_end_lsn(); unlock(log_mutex); @@ -196,15 +291,13 @@ my_bool checkpoint_indirect() checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, &system_trans, string_array); - if (0 == checkpoint_lsn) /* maybe 0 is impossible LSN to indicate error ? */ + if (LSN_IMPOSSIBLE == checkpoint_lsn) goto err; if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) goto err; - maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; - - DBUG_RETURN(0); + DBUG_RETURN(candidate_max_rec_lsn_at_last_checkpoint); err: @@ -213,7 +306,7 @@ err: my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(1); + DBUG_RETURN(LSN_IMPOSSIBLE); } @@ -235,7 +328,7 @@ log_write_record(...) ask one system thread (the "LRD background flusher and checkpointer thread" WL#3261) to do a checkpoint */ - request_checkpoint(INDIRECT, 0 /*wait_for_completion*/); + request_asynchronous_checkpoint(INDIRECT); } ...; unlock(log_mutex); @@ -243,152 +336,38 @@ log_write_record(...) } /* - Call this when you want to request a checkpoint. - In real life it will be called by log_write_record() and by client thread + Requests a checkpoint from the background thread, *asynchronously* + (requestor does not wait for completion, and does not even later check the + result). + In real life it will be called by log_write_record(). which explicitely wants to do checkpoint (ALTER ENGINE CHECKPOINT checkpoint_level). */ -int request_checkpoint(CHECKPOINT_LEVEL level, my_bool wait_for_completion) +void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); { - int error= 0; - /* - If caller wants to wait for completion we'll have to release the log mutex - to wait on condition, if caller had log mutex he may not be happy that we - release it, so we check that caller didn't have log mutex. - */ - if (wait_for_completion) - { - lock(log_mutex); - } - else - safemutex_assert_owner(log_mutex); + safemutex_assert_owner(log_mutex); - DBUG_ASSERT(checkpoint_request >= checkpoint_running); DBUG_ASSERT(level > NONE); if (checkpoint_request < level) { /* no equal or stronger running or to run, we post request */ /* note that thousands of requests for checkpoints are going to come all - at the same time (when the log bound is passed), so it may not be a good - idea for each of them to broadcast a cond. We just don't broacast a - cond, the checkpoint thread will wake up in max one second. + at the same time (when the log bound + MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS is passed), so it may not be a + good idea for each of them to broadcast a cond to wake up the background + checkpoint thread. We just don't broacast a cond, the checkpoint thread + will notice our request in max a few seconds. */ checkpoint_request= level; /* post request */ } - - if (wait_for_completion) - { - uint checkpoints_done_copy= checkpoints_done; - uint checkpoint_errors_copy= checkpoint_errors; - /* - note that the "==done" works when the uint counter wraps too, so counter - can even be smaller than uint if we wanted (however it should be big - enough so that max_the_int_type checkpoints cannot happen between two - wakeups of our thread below). uint sounds fine. - Wait for our checkpoint to be done: - */ - - if (checkpoint_running != NONE) /* not ours, let it pass */ - { - while (1) - { - if (checkpoints_done != checkpoints_done_copy) - { - if (checkpoints_done == (checkpoints_done_copy+1)) - { - /* not our checkpoint, forget about it */ - checkpoints_done_copy= checkpoints_done; - } - break; /* maybe even ours has been done at this stage! */ - } - cond_wait(checkpoint_done_cond, log_mutex); - } - } - - /* now we come to waiting for our checkpoint */ - while (1) - { - if (checkpoints_done != checkpoints_done_copy) - { - /* our checkpoint has been done */ - break; - } - if (checkpoint_errors != checkpoint_errors_copy) - { - /* - the one which was running a few milliseconds ago (if there was one), - and/or ours, had an error, just assume it was ours. So there - is a possibility that we return error though we succeeded, in which - case user will have to retry; but two simultanate checkpoints have - high changes to fail together (as the error probably comes from - malloc or disk write problem), so chance of false alarm is low. - Reporting the error only to the one which caused the error would - require having a (not fixed size) list of all requests, not worth it. - */ - error= 1; - break; - } - cond_wait(checkpoint_done_cond, log_mutex); - } - unlock(log_mutex); - } /* ... if (wait_for_completion) */ /* - If wait_for_completion was false, and there was an error, only an error + If there was an error, only an error message to the error log will say it; normal, for a checkpoint triggered by a log write, we probably don't want the client's log write to throw an error, as the log write succeeded and a checkpoint failure is not critical: the failure in this case is more for the DBA to know than for the end user. */ - return error; } - -void mark_checkpoint_done(int error) -{ - safemutex_assert_owner(log_mutex); - if (error) - checkpoint_errors++; - /* a checkpoint is said done even if it had an error */ - checkpoints_done++; - if (checkpoint_request == checkpoint_running) - { - /* - No new request has been posted, so we satisfied all requests, forget - about them. - */ - checkpoint_request= NONE; - } - checkpoint_running= NONE; - written_since_last_checkpoint= 0; - broadcast(checkpoint_done_cond); -} - -/* - Alternative (not to be done, too disturbing): - do the autocheckpoint in the thread which passed the bound first (and do the - checkpoint in the client thread which requested it). - It will give a delay to that client thread which passed the bound (time to - fsync() for example 1000 files is 16 s on my laptop). Here is code for - explicit and implicit checkpoints, where client thread does the job: -*/ -#if 0 -{ - lock(log_mutex); /* explicit takes it here, implicit already has it */ - while (checkpoint_running != NONE) - { - if (checkpoint_running >= my_level) /* always true for auto checkpoints */ - goto end; /* we skip checkpoint */ - /* a less strong is running, I'll go next */ - wait_on_checkpoint_done_cond(); - } - checkpoint_running= my_level; - checkpoint(my_level); // can gather checkpoint_start_lsn before unlock - lock(log_mutex); - checkpoint_running= NONE; - written_since_last_checkpoint= 0; -end: - unlock(log_mutex); -} -#endif diff --git a/storage/maria/checkpoint.h b/storage/maria/checkpoint.h index ce8066de93d..a9de18c695f 100644 --- a/storage/maria/checkpoint.h +++ b/storage/maria/checkpoint.h @@ -13,11 +13,7 @@ typedef enum enum_checkpoint_level { FULL /* also flush all dirty pages */ } CHECKPOINT_LEVEL; -/* - Call this when you want to request a checkpoint. - In real life it will be called by log_write_record() and by client thread - which explicitely wants to do checkpoint (ALTER ENGINE CHECKPOINT - checkpoint_level). -*/ -int request_checkpoint(CHECKPOINT_LEVEL level, my_bool wait_for_completion); +void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); +my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level); +my_bool execute_asynchronous_checkpoint_if_any(); /* that's all that's needed in the interface */ diff --git a/storage/maria/least_recently_dirtied.c b/storage/maria/least_recently_dirtied.c index e251e3c582c..a99f0c372d5 100644 --- a/storage/maria/least_recently_dirtied.c +++ b/storage/maria/least_recently_dirtied.c @@ -48,6 +48,15 @@ Key cache has groupping already somehow Monty said (investigate that). */ #define FLUSH_GROUP_SIZE 512 /* 8 MB */ +/* + We don't want to probe for checkpoint requests all the time (it takes + the log mutex). + If FLUSH_GROUP_SIZE is 8MB, assuming a local disk which can write 30MB/s + (1.8GB/min), probing every 16th call to flush_one_group_from_LRD() is every + 16*8=128MB which is every 128/30=4.2second. + Using a power of 2 gives a fast modulo operation. +*/ +#define CHECKPOINT_PROBING_PERIOD_LOG2 4 /* This thread does background flush of pieces of the LRD, and all checkpoints. @@ -56,19 +65,19 @@ pthread_handler_decl background_flush_and_checkpoint_thread() { char *flush_group_buffer= my_malloc(PAGE_SIZE*FLUSH_GROUP_SIZE); + uint flush_calls= 0; while (this_thread_not_killed) { - lock(log_mutex); - if (checkpoint_request) - checkpoint(); /* will unlock mutex */ - else - { - unlock(log_mutex); - lock(global_LRD_mutex); - flush_one_group_from_LRD(); - safemutex_assert_not_owner(global_LRD_mutex); - } - my_sleep(1000000); /* one second ? */ + if ((flush_calls++) & ((2<=max_lsn */ @@ -165,7 +180,7 @@ int flush_all_LRD_to_lsn(LSN max_lsn) max_lsn= LRD->first->prev->rec_lsn; while (LRD->first->rec_lsn < max_lsn) { - if (flush_one_group_from_LRD()) /* will unlock mutex */ + if (flush_one_group_from_LRD()) /* will unlock LRD mutex */ return 1; /* scheduler may preempt us here so that we don't take full CPU */ lock(global_LRD_mutex); From 832776fd2d4d11825eaeb945e57d6eeab816cefc Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 26 Jul 2006 09:47:44 +0200 Subject: [PATCH 12/95] two include files were missing in the source tarball, causing pushbuild build failures. include/Makefile.am: two include files missing in the source tarball --- include/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Makefile.am b/include/Makefile.am index e1ddadb933a..457e49c338b 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -24,7 +24,7 @@ pkginclude_HEADERS = my_dbug.h m_string.h my_sys.h my_list.h my_xml.h \ sslopt-vars.h sslopt-case.h sql_common.h keycache.h \ mysql_time.h mysql/plugin.h $(BUILT_SOURCES) noinst_HEADERS = config-win.h config-netware.h \ - heap.h my_bitmap.h my_uctype.h \ + heap.h maria.h myisamchk.h my_bitmap.h my_uctype.h \ myisam.h myisampack.h myisammrg.h ft_global.h\ mysys_err.h my_base.h help_start.h help_end.h \ my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \ From e6efa27c0b720702f3841a4d28e795c49432bfc5 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 26 Jul 2006 10:36:34 +0200 Subject: [PATCH 13/95] Writing down MikaelR's comments made in May in Helsinki so that they are not forgotten. Minor fixes. storage/maria/checkpoint.c: minor fixes storage/maria/least_recently_dirtied.c: writing down MikaelR's comments. storage/maria/recovery.c: writing down MikaelR's comments. Some small fixes. --- storage/maria/checkpoint.c | 10 ++-- storage/maria/least_recently_dirtied.c | 9 ++++ storage/maria/recovery.c | 69 +++++++++++++++++--------- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/storage/maria/checkpoint.c b/storage/maria/checkpoint.c index ed18e95c360..151efe6ad4f 100644 --- a/storage/maria/checkpoint.c +++ b/storage/maria/checkpoint.c @@ -246,7 +246,7 @@ LSN checkpoint_indirect(my_bool need_log_mutex) if no latch, use double variable of type ULONGLONG_CONSISTENT in st_transaction, or even no need if Intel >=486 */ - int8store(ptr, el->first_purge_lsn); + int8store(ptr, el->first_undo_lsn); ptr+= 8; /* possibly unlatch el.rwlock */ } @@ -297,16 +297,18 @@ LSN checkpoint_indirect(my_bool need_log_mutex) if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) goto err; - DBUG_RETURN(candidate_max_rec_lsn_at_last_checkpoint); + goto end; err: - print_error_to_error_log(the_error_message); + candidate_max_rec_lsn_at_last_checkpoint= LSN_IMPOSSIBLE; + +end: my_free(buffer1.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(LSN_IMPOSSIBLE); + DBUG_RETURN(candidate_max_rec_lsn_at_last_checkpoint); } diff --git a/storage/maria/least_recently_dirtied.c b/storage/maria/least_recently_dirtied.c index a99f0c372d5..bca13ca6f1f 100644 --- a/storage/maria/least_recently_dirtied.c +++ b/storage/maria/least_recently_dirtied.c @@ -19,6 +19,13 @@ #include "page_cache.h" #include "least_recently_dirtied.h" +/* + MikaelR suggested removing this global_LRD_mutex (I have a paper note of + comments), however at least for the first version we'll start with this + mutex (which will be a LOCK-based atomic_rwlock). +*/ +pthread_mutex_t global_LRD_mutex; + /* When we flush a page, we should pin page. This "pin" is to protect against that: @@ -61,6 +68,8 @@ /* This thread does background flush of pieces of the LRD, and all checkpoints. Just launch it when engine starts. + MikaelR questioned why the same thread does two different jobs, the risk + could be that while a checkpoint happens no LRD flushing happens. */ pthread_handler_decl background_flush_and_checkpoint_thread() { diff --git a/storage/maria/recovery.c b/storage/maria/recovery.c index 5af7019e5fb..589e0971686 100644 --- a/storage/maria/recovery.c +++ b/storage/maria/recovery.c @@ -16,31 +16,42 @@ typedef struct st_record_type_properties { /* used for debug error messages or "maria_read_log" command-line tool: */ char *name, my_bool record_ends_group; - int (*record_execute)(RECORD *); /* param will be record header instead later */ + /* a function to execute when we see the record during the REDO phase */ + int (*record_execute_in_redo_phase)(RECORD *); /* param will be record header instead later */ + /* a function to execute when we see the record during the UNDO phase */ + int (*record_execute_in_undo_phase)(RECORD *); /* param will be record header instead later */ } RECORD_TYPE_PROPERTIES; +int no_op(RECORD *) {return 0}; + RECORD_TYPE_PROPERTIES all_record_type_properties[]= { /* listed here in the order of the "log records type" enumeration */ - {"REDO_INSERT_HEAD", 0, redo_insert_head_execute}, + {"REDO_INSERT_HEAD", FALSE, redo_insert_head_execute_in_redo_phase, no_op}, ..., - {"UNDO_INSERT" , 1, undo_insert_execute }, - {"COMMIT", , 1, commit_execute }, + {"UNDO_INSERT" , TRUE , undo_insert_execute_in_redo_phase, undo_insert_execute_in_undo_phase}, + {"COMMIT", , TRUE , commit_execute_in_redo_phase, no_op}, ... }; -int redo_insert_head_execute(RECORD *record) +int redo_insert_head_execute_in_redo_phase(RECORD *record) { /* write the data to the proper page */ } -int undo_insert_execute(RECORD *record) +int undo_insert_execute_in_redo_phase(RECORD *record) { trans_table[short_trans_id].undo_lsn= record.lsn; - /* restore the old version of the row */ + /* don't restore the old version of the row */ } -int commit_execute(RECORD *record) +int undo_insert_execute_in_undo_phase(RECORD *record) +{ + /* restore the old version of the row */ + trans_table[short_trans_id].undo_lsn= record.prev_undo_lsn; +} + +int commit_execute_in_redo_phase(RECORD *record) { trans_table[short_trans_id].state= COMMITTED; /* @@ -52,8 +63,8 @@ int commit_execute(RECORD *record) #define record_ends_group(R) \ all_record_type_properties[(R)->type].record_ends_group) -#define execute_log_record(R) \ - all_record_type_properties[(R).type].record_execute(R) +#define execute_log_record_in_redo_phase(R) \ + all_record_type_properties[(R).type].record_execute_in_redo_phase(R) int recovery() @@ -77,7 +88,10 @@ int recovery() phase): */ - record= log_read_record(min(rec_lsn, ...)); + /**** REDO PHASE *****/ + + record= log_read_record(min(rec_lsn, ...)); /* later, read only header */ + /* if log handler knows the end LSN of the log, we could print here how many MB of log we have to read (to give an idea of the time), and print @@ -94,15 +108,11 @@ int recovery() */ if (record_ends_group(record) { - /* - such end events can always be executed immediately (they don't touch - the disk). - */ - execute_log_record(record); if (trans_table[record.short_trans_id].group_start_lsn != 0) { /* - There is a complete group for this transaction. + There is a complete group for this transaction, containing more than + this event. We're going to read recently read log records: for this log_read_record() to be efficient (not touch the disk), log handler could cache recently read pages @@ -110,17 +120,19 @@ int recovery() log handler page cache). Without it only OS file cache will help. */ - record2= log_read_record(trans_table[record.short_trans_id].group_start_lsn); - while (record2.lsn < record.lsn) + record2= + log_read_record(trans_table[record.short_trans_id].group_start_lsn); + + do { if (record2.short_trans_id == record.short_trans_id) - execute_log_record(record2); /* it's in our group */ + execute_log_record_in_redo_phase(record2); /* it's in our group */ record2= log_read_next_record(); } + while (record2.lsn < record.lsn); trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */ - /* we're now at the UNDO, re-read it to advance log pointer */ - record2= log_read_next_record(); /* and throw it away */ } + execute_log_record_in_redo_phase(record); } else /* record does not end group */ { @@ -161,7 +173,14 @@ int recovery() the log, and so the delete/update handler may do changes which conflict with these REDOs. Even if done here, better to not wake it up now as we're going to free the - page cache: + page cache. + + MikaelR suggests: support checkpoints during REDO phase too: do checkpoint + after a certain amount of log records have been executed. This helps + against repeated crashes. Those checkpoints could not be user-requested + (as engine is not communicating during the REDO phase), so they would be + automatic: this changes the original assumption that we don't write to the + log while in the REDO phase, but why not. How often should we checkpoint? */ /* @@ -178,6 +197,8 @@ int recovery() do this: */ + /**** UNDO PHASE *****/ + print_information_to_error_log(nb of trans to roll back, nb of prepared trans); /* @@ -217,7 +238,7 @@ pthread_handler_decl rollback_background_thread() { /* this is the normal runtime-rollback code: */ record= log_read_record(trans->undo_lsn); - execute_log_record(record); + execute_log_record_in_undo_phase(record); trans->undo_lsn= record.prev_undo_lsn; } /* remove trans from list */ From 99c431db92f8904bf50f6944e1488a0172c4ebd8 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Aug 2006 16:36:54 +0200 Subject: [PATCH 14/95] Completion of merge of mysql-5.1 into mysql-maria. Manually imported changes done to MyISAM (include/myisam.h, storage/myisam/*, sql/ha_myisam.*, mysql-test/t/myisam.test, mysql-test/t/ps_2myisam.test) the last months into Maria (tedious, should do it more frequently in the future), including those not done at the previous 5.1->Maria merge (please in the future don't forget to apply MyISAM changes to Maria when you merge 5.1 into Maria). Note: I didn't try to import anything which could be MyISAM-related in other tests of mysql-test (I didn't want to dig in all csets), but as QA is working to make most tests re-usable for other engines (Falcon), it is likely that we'll benefit from this and just have to set engine=Maria somewhere to run those tests on Maria. func_group and partition tests fail but they already do in main 5.1 on my machine. No Valgrind error in t/*maria*.test. Monty: please see the commit comment of maria.result and check. BitKeeper/deleted/.del-ha_maria.m4: Delete: config/ac-macros/ha_maria.m4 configure.in: fix for the new way of enabling engines include/maria.h: importing changes done to MyISAM the last months into Maria include/my_handler.h: importing changes done to MyISAM the last months into Maria include/myisam.h: importing changes done to MyISAM the last months into Maria mysql-test/r/maria.result: identical to myisam.result, except the engine name in some places AND in the line testing key_block_size=1000000000000000000: Maria gives a key block size of 8192 while MyISAM gives 4096; is it explainable by the difference between MARIA_KEY_BLOCK_LENGTH and the same constant in MyISAM? Monty? mysql-test/r/ps_maria.result: identical to ps_2myisam.result (except the engine name in some places) mysql-test/t/maria.test: instead of engine=maria everywhere, I use @@storage_engine (reduces the diff with myisam.test). importing changes done to MyISAM the last months into Maria mysys/my_handler.c: importing changes done to MyISAM the last months into Maria sql/ha_maria.cc: importing changes done to MyISAM the last months into Maria sql/ha_maria.h: importing changes done to MyISAM the last months into Maria sql/mysqld.cc: unneeded storage/maria/Makefile.am: importing changes done to MyISAM the last months into Maria storage/maria/ma_check.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_create.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_delete_table.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_dynrec.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_extra.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_boolean_search.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_eval.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_nlq_search.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_parser.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_test1.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_update.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ftdefs.h: importing changes done to MyISAM the last months into Maria storage/maria/ma_key.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_open.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_page.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rkey.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rsamepos.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rt_index.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rt_mbr.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_search.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_sort.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_test1.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_test2.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_test3.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_update.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_write.c: importing changes done to MyISAM the last months into Maria storage/maria/maria_chk.c: importing changes done to MyISAM the last months into Maria storage/maria/maria_def.h: importing changes done to MyISAM the last months into Maria storage/maria/maria_ftdump.c: importing changes done to MyISAM the last months into Maria storage/maria/maria_pack.c: importing changes done to MyISAM the last months into Maria --- config/ac-macros/ha_maria.m4 | 29 --- configure.in | 11 +- include/maria.h | 6 +- include/my_handler.h | 2 - include/myisam.h | 1 + mysql-test/r/maria.result | 294 +++++++++++++++++++++------ mysql-test/r/ps_maria.result | 4 +- mysql-test/t/maria.test | 242 ++++++++++++++++------ mysys/my_handler.c | 1 - sql/ha_maria.cc | 161 +++++++-------- sql/ha_maria.h | 8 +- sql/mysqld.cc | 1 - storage/maria/Makefile.am | 2 +- storage/maria/ma_check.c | 105 ++++++---- storage/maria/ma_create.c | 82 ++++++-- storage/maria/ma_delete_table.c | 24 ++- storage/maria/ma_dynrec.c | 8 + storage/maria/ma_extra.c | 57 +++--- storage/maria/ma_ft_boolean_search.c | 62 +++--- storage/maria/ma_ft_eval.c | 1 + storage/maria/ma_ft_nlq_search.c | 10 +- storage/maria/ma_ft_parser.c | 101 +++++---- storage/maria/ma_ft_test1.c | 1 + storage/maria/ma_ft_update.c | 57 +++--- storage/maria/ma_ftdefs.h | 22 +- storage/maria/ma_key.c | 16 +- storage/maria/ma_open.c | 12 +- storage/maria/ma_page.c | 9 +- storage/maria/ma_rkey.c | 19 +- storage/maria/ma_rsamepos.c | 3 +- storage/maria/ma_rt_index.c | 8 +- storage/maria/ma_rt_mbr.c | 6 +- storage/maria/ma_search.c | 47 ++--- storage/maria/ma_sort.c | 1 + storage/maria/ma_test1.c | 1 + storage/maria/ma_test2.c | 16 +- storage/maria/ma_test3.c | 2 + storage/maria/ma_update.c | 3 +- storage/maria/ma_write.c | 3 +- storage/maria/maria_chk.c | 10 +- storage/maria/maria_def.h | 7 +- storage/maria/maria_ftdump.c | 19 +- storage/maria/maria_pack.c | 6 +- 43 files changed, 952 insertions(+), 528 deletions(-) delete mode 100644 config/ac-macros/ha_maria.m4 diff --git a/config/ac-macros/ha_maria.m4 b/config/ac-macros/ha_maria.m4 deleted file mode 100644 index 56ec8720a09..00000000000 --- a/config/ac-macros/ha_maria.m4 +++ /dev/null @@ -1,29 +0,0 @@ -dnl --------------------------------------------------------------------------- -dnl Macro: MYSQL_CHECK_MARIA -dnl Sets HAVE_MARIA_DB if --with-maria-storage-engine is used -dnl --------------------------------------------------------------------------- -AC_DEFUN([MYSQL_CHECK_MARIA], [ - AC_ARG_WITH([maria-storage-engine], - [ - --with-maria-storage-engine - Enable the Maria Storage Engine], - [mariadb="$withval"], - [mariadb=no]) - AC_MSG_CHECKING([for Maria storage engine]) - - case "$mariadb" in - yes ) - AC_DEFINE([HAVE_MARIA_DB], [1], [Builds Maria Storage Engine]) - AC_MSG_RESULT([yes]) - [mariadb=yes] - ;; - * ) - AC_MSG_RESULT([no]) - [mariadb=no] - ;; - esac - -]) -dnl --------------------------------------------------------------------------- -dnl END OF MYSQL_CHECK_MARIA SECTION -dnl --------------------------------------------------------------------------- diff --git a/configure.in b/configure.in index 989c3d21906..c5e6575ac5f 100644 --- a/configure.in +++ b/configure.in @@ -2200,15 +2200,16 @@ MYSQL_PLUGIN_ACTIONS(ndbcluster,[MYSQL_SETUP_NDBCLUSTER]) MYSQL_STORAGE_ENGINE(partition, partition, [Partition Support], [MySQL Partitioning Support], [max,max-no-ndb]) -MYSQL_STORAGE_ENGINE(maria,maria, [Maria Storage Engine], - [Traditional transactional MySQL tables]) -MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) -MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) -MYSQL_PLUGIN_MANDATORY(maria) dnl -- ndbcluster requires partition to be enabled MYSQL_PLUGIN_DEPENDS(ndbcluster, [partition]) +MYSQL_STORAGE_ENGINE(maria,maria, [Maria Storage Engine], + [Traditional transactional MySQL tables], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) +MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) +MYSQL_PLUGIN_MANDATORY(maria) + MYSQL_CONFIGURE_PLUGINS([none]) # Only build client code? diff --git a/include/maria.h b/include/maria.h index 72fb1cf2dff..705ba7c91c7 100644 --- a/include/maria.h +++ b/include/maria.h @@ -168,7 +168,7 @@ typedef struct st_maria_keydef /* Key definition with open & info */ uint16 keylength; /* Tot length of keyparts (auto) */ uint16 minlength; /* min length of (packed) key (auto) */ uint16 maxlength; /* max length of (packed) key (auto) */ - uint16 block_size; /* block_size (auto) */ + uint16 block_size_index; /* block_size (auto) */ uint32 version; /* For concurrent read/write */ uint32 ftparser_nr; /* distinct ftparser number */ @@ -235,7 +235,6 @@ typedef struct st_maria_columndef /* column information */ } MARIA_COLUMNDEF; -extern my_string maria_log_filename; /* Name of logfile */ extern ulong maria_block_size; extern ulong maria_concurrent_insert; extern my_bool maria_flush, maria_delay_key_write, maria_single_user; @@ -281,9 +280,9 @@ extern int maria_delete_table(const char *name); extern int maria_rename(const char *from, const char *to); extern int maria_extra(struct st_maria_info *file, enum ha_extra_function function, void *extra_arg); +extern int maria_reset(struct st_maria_info *file); extern ha_rows maria_records_in_range(struct st_maria_info *info, int inx, key_range *min_key, key_range *max_key); -extern int maria_logging(int activate_log); extern int maria_is_changed(struct st_maria_info *info); extern int maria_delete_all_rows(struct st_maria_info *info); extern uint maria_get_pointer_length(ulonglong file_length, uint def); @@ -328,6 +327,7 @@ typedef struct st_maria_sort_param uchar **sort_keys; byte *rec_buff; void *wordlist, *wordptr; + MEM_ROOT wordroot; char *record; MY_TMPDIR *tmpdir; diff --git a/include/my_handler.h b/include/my_handler.h index ea027b18725..9b086036389 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -18,8 +18,6 @@ #ifndef _my_handler_h #define _my_handler_h -#include "my_base.h" -#include "m_ctype.h" #include "myisampack.h" /* diff --git a/include/myisam.h b/include/myisam.h index 07e8346f981..99c8c8d73ae 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -334,6 +334,7 @@ typedef struct st_mi_sort_param uchar **sort_keys; byte *rec_buff; void *wordlist, *wordptr; + MEM_ROOT wordroot; char *record; MY_TMPDIR *tmpdir; diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 3b360873c36..b6924e4395b 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -1,9 +1,11 @@ +set global storage_engine=maria; +set session storage_engine=maria; drop table if exists t1,t2; SET SQL_WARNINGS=1; CREATE TABLE t1 ( STRING_DATA char(255) default NULL, KEY string_data (STRING_DATA) -) ENGINE=MARIA; +); INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); @@ -14,7 +16,7 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; -create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)) engine=maria; +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); check table t1; Table Op Msg_type Msg_text test.t1 check status OK @@ -32,7 +34,7 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; -create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)) engine=maria; +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)); insert into t1 (b) values (1),(2),(2),(2),(2); optimize table t1; Table Op Msg_type Msg_text @@ -49,7 +51,7 @@ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_par t1 0 PRIMARY 1 a A 5 NULL NULL BTREE t1 1 b 1 b A 1 NULL NULL BTREE drop table t1; -create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)) engine=maria; +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)); insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); explain select * from t1 order by a; id select_type table type possible_keys key key_len ref rows Extra @@ -76,7 +78,7 @@ explain select a,b,c from t1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 4 drop table t1; -CREATE TABLE t1 (a INT) engine=maria; +CREATE TABLE t1 (a INT); INSERT INTO t1 VALUES (1), (2), (3); LOCK TABLES t1 WRITE; INSERT INTO t1 VALUES (1), (2), (3); @@ -84,7 +86,7 @@ OPTIMIZE TABLE t1; Table Op Msg_type Msg_text test.t1 optimize status OK DROP TABLE t1; -create table t1 ( t1 char(255), key(t1(250))) engine=maria; +create table t1 ( t1 char(255), key(t1(250))); insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); @@ -245,7 +247,7 @@ int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 -int, i999 int, i1000 int, b blob) row_format=dynamic engine=maria; +int, i999 int, i1000 int, b blob) row_format=dynamic; insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -313,7 +315,7 @@ KEY `ip` (`ip`), KEY `poster_login` (`poster_login`), KEY `topic_id` (`topic_id`), FULLTEXT KEY `post_text` (`post_text`) -) ENGINE=MARIA; +); INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); REPAIR TABLE t1; Table Op Msg_type Msg_text @@ -322,15 +324,15 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; -CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)) engine=maria; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)); ERROR 42000: Specified key was too long; max key length is 1000 bytes -CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)) engine=maria; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)); ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); ERROR 42000: Specified key was too long; max key length is 1000 bytes DROP TABLE t1; -CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)) engine=maria; +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)); INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); -create table t2 (a int not null, b int, c int, key(b), key(c), key(a)) engine=maria; +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)); INSERT into t2 values (1,1,1), (2,2,2); optimize table t1; Table Op Msg_type Msg_text @@ -346,11 +348,11 @@ t1 1 c_2 2 a A 5 NULL NULL BTREE explain select * from t1,t2 where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL a NULL NULL NULL 2 -1 SIMPLE t1 ALL a NULL NULL NULL 4 Using where +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where explain select * from t1,t2 force index(a) where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL a NULL NULL NULL 2 -1 SIMPLE t1 ALL a NULL NULL NULL 4 Using where +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL a NULL NULL NULL 2 @@ -362,7 +364,7 @@ id select_type table type possible_keys key key_len ref rows Extra explain select * from t1,t2 force index(c) where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL NULL NULL NULL NULL 2 -1 SIMPLE t1 ALL a NULL NULL NULL 4 Using where +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where explain select * from t1 where a=0 or a=2; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where @@ -376,7 +378,7 @@ explain select * from t1 use index() where c=1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where drop table t1,t2; -create table t1 (a int not null auto_increment primary key, b varchar(255)) engine=maria; +create table t1 (a int not null auto_increment primary key, b varchar(255)); insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); update t1 set b=repeat(left(b,1),200) where a=1; delete from t1 where (a & 1)= 0; @@ -398,7 +400,7 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; -create table t1 ( a text not null, key a (a(20))) engine=maria; +create table t1 ( a text not null, key a (a(20))); insert into t1 values ('aaa '),('aaa'),('aa'); check table t1; Table Op Msg_type Msg_text @@ -420,7 +422,7 @@ bbb. bbb. aa. drop table t1; -create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))) engine=maria; +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))); insert into t1 values('807780', '477', '165'); insert into t1 values('807780', '477', '162'); insert into t1 values('807780', '472', '162'); @@ -431,7 +433,7 @@ drop table t1; DROP TABLE IF EXISTS t1; Warnings: Note 1051 Unknown table 't1' -CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)) engine=maria; +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)); INSERT t1 VALUES ("can \tcan"); INSERT t1 VALUES ("can can"); INSERT t1 VALUES ("can"); @@ -444,7 +446,7 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; -create table t1 (a blob) engine=maria; +create table t1 (a blob); insert into t1 values('a '),('a'); select concat(a,'.') from t1 where a='a'; concat(a,'.') @@ -460,7 +462,7 @@ select concat(a,'.') from t1 where a='a '; concat(a,'.') a . drop table t1; -create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))) engine=maria; +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))); insert into t1 (b) values ('a'),('b'),('c'); select concat(b,'.') from t1; concat(b,'.') @@ -483,8 +485,8 @@ a concat(b,'.') 1 a. 3 c. drop table t1; -create table t1 (a int not null) engine=maria; -create table t2 (a int not null, primary key (a)) engine=maria; +create table t1 (a int not null); +create table t2 (a int not null, primary key (a)); insert into t1 values (1); insert into t2 values (1),(2); select sql_big_result distinct t1.a from t1,t2 order by t2.a; @@ -508,17 +510,17 @@ drop table t1,t2; create table t1 ( c1 varchar(32), key (c1) -) engine=maria; +); alter table t1 disable keys; insert into t1 values ('a'), ('b'); select c1 from t1 order by c1 limit 1; c1 a drop table t1; -CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)) ENGINE=MARIA; +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); Got one of the listed errors -create table t1 (a int, b varchar(200), c text not null) checksum=1 engine=maria; -create table t2 (a int, b varchar(200), c text not null) checksum=0 engine=maria; +create table t1 (a int, b varchar(200), c text not null) checksum=1; +create table t2 (a int, b varchar(200), c text not null) checksum=0; insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); insert t2 select * from t1; checksum table t1, t2, t3 quick; @@ -543,7 +545,7 @@ test.t3 NULL Warnings: Error 1146 Table 'test.t3' doesn't exist drop table t1,t2; -create table t1 (a int, key (a)) engine=maria; +create table t1 (a int, key (a)); show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment t1 1 a 1 a A NULL NULL NULL YES BTREE @@ -551,7 +553,7 @@ alter table t1 disable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment t1 1 a 1 a A NULL NULL NULL YES BTREE disabled -create table t2 (a int) engine=maria; +create table t2 (a int); set @@rand_seed1=31415926,@@rand_seed2=2718281828; insert t1 select * from t2; show keys from t1; @@ -569,7 +571,7 @@ show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment t1 1 a 1 a NULL 500 NULL NULL YES HASH drop table t1,t2; -create table t1 ( a tinytext, b char(1), index idx (a(1),b)) engine=maria; +create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); insert into t1 values (null,''), (null,''); explain select count(*) from t1 where a is null; id select_type table type possible_keys key key_len ref rows Extra @@ -579,20 +581,20 @@ count(*) 2 drop table t1; create table t1 (c1 int, c2 varchar(4) not null default '', -key(c2(3))) default charset=utf8 engine=maria; +key(c2(3))) default charset=utf8; insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); update t1 set c2='A B' where c1=2; check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; -create table t1 (c1 int) engine=maria; +create table t1 (c1 int); insert into t1 values (1),(2),(3),(4); checksum table t1; Table Checksum test.t1 149057747 delete from t1 where c1 = 1; -create table t2 engine=maria as select * from t1; +create table t2 as select * from t1; checksum table t1; Table Checksum test.t1 984116287 @@ -603,7 +605,7 @@ drop table t1, t2; show variables like 'maria_stats_method'; Variable_name Value maria_stats_method nulls_unequal -create table t1 (a int, key(a)) engine=maria; +create table t1 (a int, key(a)); insert into t1 values (0),(1),(2),(3),(4); insert into t1 select NULL from t1; analyze table t1; @@ -668,7 +670,7 @@ maria_stats_method nulls_ignored create table t1 ( a char(3), b char(4), c char(5), d char(6), key(a,b,c,d) -) engine=maria; +); insert into t1 values ('bcd','def1', NULL, 'zz'); insert into t1 values ('bcd','def2', NULL, 'zz'); insert into t1 values ('bce','def1', 'yuu', NULL); @@ -699,7 +701,7 @@ cip INT NOT NULL, time TIME NOT NULL, score INT NOT NULL DEFAULT 0, bob TINYBLOB -) engine=maria; +); insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), (6, 'c', '00:06'); @@ -729,7 +731,27 @@ select count(id1) from t1 where id2 = 10; count(id1) 5 drop table t1; -set storage_engine=MARIA; +CREATE TABLE t1(a TINYINT, KEY(a)); +INSERT INTO t1 VALUES(1); +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +MAX(a) +1 +ALTER TABLE t1 DISABLE KEYS; +SELECT MAX(a) FROM t1; +MAX(a) +1 +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +MAX(a) +1 +DROP TABLE t1; +CREATE TABLE t1(a CHAR(9), b VARCHAR(7)); +INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx'); +UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; +SELECT * FROM t1; +a b +xxxxxxxxx bbbbbb +xxxxxxxxx bbbbbb +DROP TABLE t1; drop table if exists t1,t2,t3; --- Testing varchar --- --- Testing varchar --- @@ -1301,11 +1323,11 @@ select * from t1 where a=20 and b is null; a b 20 NULL drop table t1; -create table t1 (v varchar(65530), key(v)) engine=maria; +create table t1 (v varchar(65530), key(v)); Warnings: Warning 1071 Specified key was too long; max key length is 1000 bytes drop table if exists t1; -create table t1 (v varchar(65536)) engine=maria; +create table t1 (v varchar(65536)); Warnings: Note 1246 Converting column 'v' from VARCHAR to TEXT show create table t1; @@ -1314,7 +1336,7 @@ t1 CREATE TABLE `t1` ( `v` mediumtext ) ENGINE=MARIA DEFAULT CHARSET=latin1 drop table t1; -create table t1 (v varchar(65530) character set utf8) engine=maria; +create table t1 (v varchar(65530) character set utf8); Warnings: Note 1246 Converting column 'v' from VARCHAR to TEXT show create table t1; @@ -1323,24 +1345,11 @@ t1 CREATE TABLE `t1` ( `v` mediumtext CHARACTER SET utf8 ) ENGINE=MARIA DEFAULT CHARSET=latin1 drop table t1; -create table t1 (v varchar(65535)) engine=maria; +create table t1 (v varchar(65535)); ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. You have to change some columns to TEXT or BLOBs -set storage_engine=MyISAM; -create table t1 (a int) engine=maria; -drop table if exists t1; -Warnings: -Error 2 Can't find file: 't1' (errno: 2) -create table t1 (a int) engine=maria; -drop table t1; -Got one of the listed errors -create table t1 (a int) engine=maria; -drop table t1; -Got one of the listed errors -drop table t1; -ERROR 42S02: Unknown table 't1' set @save_concurrent_insert=@@concurrent_insert; set global concurrent_insert=1; -create table t1 (a int) engine=maria; +create table t1 (a int); insert into t1 values (1),(2),(3),(4),(5); lock table t1 read local; insert into t1 values(6),(7); @@ -1367,7 +1376,7 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; -create table t1 (a int, b varchar(30) default "hello") engine=maria; +create table t1 (a int, b varchar(30) default "hello"); insert into t1 (a) values (1),(2),(3),(4),(5); lock table t1 read local; insert into t1 (a) values(6),(7); @@ -1395,7 +1404,7 @@ Table Op Msg_type Msg_text test.t1 check status OK drop table t1; set global concurrent_insert=@save_concurrent_insert; -create table t1 (a int, key(a)) engine=maria; +create table t1 (a int, key(a)); insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); analyze table t1; Table Op Msg_type Msg_text @@ -1409,9 +1418,172 @@ show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment t1 1 a 1 a A 8 NULL NULL YES BTREE drop table t1; -create table t1 (c1 int) engine=maria pack_keys=0; -create table t2 (c1 int) engine=maria pack_keys=1; -create table t3 (c1 int) engine=maria pack_keys=default; -create table t4 (c1 int) engine=maria pack_keys=2; +show create table t1; +show create table t1; +create table t1 (a int) select 42 a; +select * from t1; +a +9 +select * from t1; +a +99 +select * from t1; +a +42 +drop table t1; +End of 4.1 tests +create table t1 (c1 int) pack_keys=0; +create table t2 (c1 int) pack_keys=1; +create table t3 (c1 int) pack_keys=default; +create table t4 (c1 int) pack_keys=2; ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '2' at line 1 drop table t1, t2, t3; +End of 5.0 tests +create table t1 (a int not null, key `a` (a) key_block_size=1024); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=2048); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=2048 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a)); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1000)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a) key_block_size=1024); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1000)) KEY_BLOCK_SIZE=4096 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1024 +alter table t1 key_block_size=2048; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048 +alter table t1 add c int, add key (c); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096, + KEY `c` (`c`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048 +alter table t1 key_block_size=0; +alter table t1 add d int, add key (d); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + `d` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096, + KEY `c` (`c`) KEY_BLOCK_SIZE=2048, + KEY `d` (`d`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`(1000)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192 +drop table t1; +create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=16384 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=512); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1000)) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=1025); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=2048 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key key_block_size=1024 (a)); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '=1024 (a))' at line 1 +create table t1 (a int not null, key `a` key_block_size=1024 (a)); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at line 1 +End of 5.1 tests +set global storage_engine=MyISAM; diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result index 00e32cf8fce..f47c72187e2 100644 --- a/mysql-test/r/ps_maria.result +++ b/mysql-test/r/ps_maria.result @@ -1775,7 +1775,7 @@ NULL as const12, @arg12 as param12, show create table t5 ; Table Create Table t5 CREATE TABLE `t5` ( - `const01` bigint(1) NOT NULL DEFAULT '0', + `const01` int(1) NOT NULL DEFAULT '0', `param01` bigint(20) DEFAULT NULL, `const02` decimal(2,1) NOT NULL DEFAULT '0.0', `param02` decimal(65,30) DEFAULT NULL, @@ -1805,7 +1805,7 @@ t5 CREATE TABLE `t5` ( ) ENGINE=MyISAM DEFAULT CHARSET=latin1 select * from t5 ; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def test t5 t5 const01 const01 8 1 1 N 32769 0 63 +def test t5 t5 const01 const01 3 1 1 N 32769 0 63 def test t5 t5 param01 param01 8 20 1 Y 32768 0 63 def test t5 t5 const02 const02 246 4 3 N 1 1 63 def test t5 t5 param02 param02 246 67 32 Y 0 30 63 diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 59d36206039..1a44775277b 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -3,6 +3,10 @@ # -- source include/have_maria.inc +let $default=`select @@global.storage_engine`; +set global storage_engine=maria; +set session storage_engine=maria; + # Initialise --disable_warnings drop table if exists t1,t2; @@ -16,7 +20,7 @@ SET SQL_WARNINGS=1; CREATE TABLE t1 ( STRING_DATA char(255) default NULL, KEY string_data (STRING_DATA) -) ENGINE=MARIA; +); INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); @@ -31,7 +35,7 @@ drop table t1; # Test problem with rows that are 65517-65520 bytes long # -create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)) engine=maria; +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); let $1=100; disable_query_log; @@ -57,7 +61,7 @@ drop table t1; # Test bug: Two optimize in a row reset index cardinality # -create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)) engine=maria; +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)); insert into t1 (b) values (1),(2),(2),(2),(2); optimize table t1; show index from t1; @@ -69,7 +73,7 @@ drop table t1; # Test of how ORDER BY works when doing it on the whole table # -create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)) engine=maria; +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)); insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); explain select * from t1 order by a; explain select * from t1 order by b; @@ -84,7 +88,7 @@ drop table t1; # # Test of OPTIMIZE of locked and modified tables # -CREATE TABLE t1 (a INT) engine=maria; +CREATE TABLE t1 (a INT); INSERT INTO t1 VALUES (1), (2), (3); LOCK TABLES t1 WRITE; INSERT INTO t1 VALUES (1), (2), (3); @@ -96,7 +100,7 @@ DROP TABLE t1; # in ha_maria::repair, and index size is changed (decreased). # -create table t1 ( t1 char(255), key(t1(250))) engine=maria; +create table t1 ( t1 char(255), key(t1(250))); insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); @@ -258,7 +262,7 @@ int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 -int, i999 int, i1000 int, b blob) row_format=dynamic engine=maria; +int, i999 int, i1000 int, b blob) row_format=dynamic; insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -325,7 +329,7 @@ CREATE TABLE `t1` ( KEY `poster_login` (`poster_login`), KEY `topic_id` (`topic_id`), FULLTEXT KEY `post_text` (`post_text`) -) ENGINE=MARIA; +); INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); @@ -338,8 +342,8 @@ drop table t1; # --error 1071 -CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)) engine=maria; -CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)) engine=maria; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)); +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)); --error 1071 ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); DROP TABLE t1; @@ -348,9 +352,9 @@ DROP TABLE t1; # Test of cardinality of keys with NULL # -CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)) engine=maria; +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)); INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); -create table t2 (a int not null, b int, c int, key(b), key(c), key(a)) engine=maria; +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)); INSERT into t2 values (1,1,1), (2,2,2); optimize table t1; show index from t1; @@ -369,7 +373,7 @@ drop table t1,t2; # Test bug when updating a split dynamic row where keys are not changed # -create table t1 (a int not null auto_increment primary key, b varchar(255)) engine=maria; +create table t1 (a int not null auto_increment primary key, b varchar(255)); insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); update t1 set b=repeat(left(b,1),200) where a=1; delete from t1 where (a & 1)= 0; @@ -402,7 +406,7 @@ drop table t1; # # two bugs in maria-space-stripping feature # -create table t1 ( a text not null, key a (a(20))) engine=maria; +create table t1 ( a text not null, key a (a(20))); insert into t1 values ('aaa '),('aaa'),('aa'); check table t1; repair table t1; @@ -416,7 +420,7 @@ drop table t1; # Third bug in the same code (BUG#2295) # -create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))) engine=maria; +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))); insert into t1 values('807780', '477', '165'); insert into t1 values('807780', '477', '162'); insert into t1 values('807780', '472', '162'); @@ -427,7 +431,7 @@ drop table t1; # space-stripping in _mi_prefix_search: BUG#5284 # DROP TABLE IF EXISTS t1; -CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)) engine=maria; +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)); INSERT t1 VALUES ("can \tcan"); INSERT t1 VALUES ("can can"); INSERT t1 VALUES ("can"); @@ -438,7 +442,7 @@ DROP TABLE t1; # # Verify blob handling # -create table t1 (a blob) engine=maria; +create table t1 (a blob); insert into t1 values('a '),('a'); select concat(a,'.') from t1 where a='a'; select concat(a,'.') from t1 where a='a '; @@ -450,7 +454,7 @@ drop table t1; # # Test text and unique # -create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))) engine=maria; +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))); insert into t1 (b) values ('a'),('b'),('c'); select concat(b,'.') from t1; update t1 set b='b ' where a=2; @@ -466,8 +470,8 @@ drop table t1; # # Test keys with 0 segments. (Bug #3203) # -create table t1 (a int not null) engine=maria; -create table t2 (a int not null, primary key (a)) engine=maria; +create table t1 (a int not null); +create table t2 (a int not null, primary key (a)); insert into t1 values (1); insert into t2 values (1),(2); select sql_big_result distinct t1.a from t1,t2 order by t2.a; @@ -483,7 +487,7 @@ drop table t1,t2; create table t1 ( c1 varchar(32), key (c1) -) engine=maria; +); alter table t1 disable keys; insert into t1 values ('a'), ('b'); select c1 from t1 order by c1 limit 1; @@ -493,13 +497,13 @@ drop table t1; # Test RTREE index # --error 1235, 1289 -CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)) ENGINE=MARIA; +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); # INSERT INTO t1 VALUES (1,1),(1,1); # DELETE FROM rt WHERE a<1; # DROP TABLE IF EXISTS t1; -create table t1 (a int, b varchar(200), c text not null) checksum=1 engine=maria; -create table t2 (a int, b varchar(200), c text not null) checksum=0 engine=maria; +create table t1 (a int, b varchar(200), c text not null) checksum=1; +create table t2 (a int, b varchar(200), c text not null) checksum=0; insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); insert t2 select * from t1; checksum table t1, t2, t3 quick; @@ -508,11 +512,11 @@ checksum table t1, t2, t3 extended; #show table status; drop table t1,t2; -create table t1 (a int, key (a)) engine=maria; +create table t1 (a int, key (a)); show keys from t1; alter table t1 disable keys; show keys from t1; -create table t2 (a int) engine=maria; +create table t2 (a int); let $i=1000; set @@rand_seed1=31415926,@@rand_seed2=2718281828; --disable_query_log @@ -534,7 +538,7 @@ drop table t1,t2; # # index search for NULL in blob. Bug #4816 # -create table t1 ( a tinytext, b char(1), index idx (a(1),b)) engine=maria; +create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); insert into t1 values (null,''), (null,''); explain select count(*) from t1 where a is null; select count(*) from t1 where a is null; @@ -544,7 +548,7 @@ drop table t1; # bug9188 - Corruption Can't open file: 'table.MYI' (errno: 145) # create table t1 (c1 int, c2 varchar(4) not null default '', - key(c2(3))) default charset=utf8 engine=maria; + key(c2(3))) default charset=utf8; insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); update t1 set c2='A B' where c1=2; check table t1; @@ -555,11 +559,11 @@ drop table t1; # Bug#12296 - CHECKSUM TABLE reports 0 for the table # This happened if the first record was marked as deleted. # -create table t1 (c1 int) engine=maria; +create table t1 (c1 int); insert into t1 values (1),(2),(3),(4); checksum table t1; delete from t1 where c1 = 1; -create table t2 engine=maria as select * from t1; +create table t2 as select * from t1; # The following returns 0 with the bug in place. checksum table t1; # The above should give the same number as the following. @@ -572,7 +576,7 @@ drop table t1, t2; show variables like 'maria_stats_method'; -create table t1 (a int, key(a)) engine=maria; +create table t1 (a int, key(a)); insert into t1 values (0),(1),(2),(3),(4); insert into t1 select NULL from t1; @@ -623,7 +627,7 @@ show variables like 'maria_stats_method'; create table t1 ( a char(3), b char(4), c char(5), d char(6), key(a,b,c,d) -) engine=maria; +); insert into t1 values ('bcd','def1', NULL, 'zz'); insert into t1 values ('bcd','def2', NULL, 'zz'); insert into t1 values ('bce','def1', 'yuu', NULL); @@ -644,7 +648,7 @@ create table t1( time TIME NOT NULL, score INT NOT NULL DEFAULT 0, bob TINYBLOB -) engine=maria; +); insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), @@ -663,7 +667,7 @@ create table t1 ( t text not null, primary key (id1), key x (id2, t(32)) -) engine=maria; +) engine=maria; # engine clause is redundant but it's to test its parsing insert into t1 (id2, t) values (10, 'abc'), (10, 'abc'), (10, 'abc'), (20, 'abc'), (20, 'abc'), (20, 'def'), @@ -672,51 +676,49 @@ select count(*) from t1 where id2 = 10; select count(id1) from t1 where id2 = 10; drop table t1; -# End of 4.1 tests +# +# BUG##20357 - Got error 124 from storage engine using MIN and MAX functions +# in queries +# +CREATE TABLE t1(a TINYINT, KEY(a)); +INSERT INTO t1 VALUES(1); +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +ALTER TABLE t1 DISABLE KEYS; +SELECT MAX(a) FROM t1; +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +DROP TABLE t1; + +# +# BUG#18036 - update of table joined to self reports table as crashed +# +CREATE TABLE t1(a CHAR(9), b VARCHAR(7)); +INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx'); +UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; +SELECT * FROM t1; +DROP TABLE t1; # # Test varchar # -let $default=`select @@storage_engine`; -set storage_engine=MARIA; source include/varchar.inc; # # Some errors/warnings on create # -create table t1 (v varchar(65530), key(v)) engine=maria; +create table t1 (v varchar(65530), key(v)); drop table if exists t1; -create table t1 (v varchar(65536)) engine=maria; +create table t1 (v varchar(65536)); show create table t1; drop table t1; -create table t1 (v varchar(65530) character set utf8) engine=maria; +create table t1 (v varchar(65530) character set utf8); show create table t1; drop table t1; # MARIA specific varchar tests --error 1118 -create table t1 (v varchar(65535)) engine=maria; - -eval set storage_engine=$default; - -# -# Test how DROP TABLE works if the index or data file doesn't exists - -create table t1 (a int) engine=maria; -system rm $MYSQLTEST_VARDIR/master-data/test/t1.MAI ; -drop table if exists t1; -create table t1 (a int) engine=maria; -system rm $MYSQLTEST_VARDIR/master-data/test/t1.MAI ; ---error 1051,6 -drop table t1; -create table t1 (a int) engine=maria; -system rm $MYSQLTEST_VARDIR/master-data/test/t1.MAD ; ---error 1105,6,29 -drop table t1; ---error 1051 -drop table t1; +create table t1 (v varchar(65535)); # # Test concurrent insert @@ -724,7 +726,7 @@ drop table t1; # set @save_concurrent_insert=@@concurrent_insert; set global concurrent_insert=1; -create table t1 (a int) engine=maria; +create table t1 (a int); insert into t1 values (1),(2),(3),(4),(5); lock table t1 read local; connect (con1,localhost,root,,); @@ -749,7 +751,7 @@ drop table t1; disconnect con1; # Same test with dynamic record length -create table t1 (a int, b varchar(30) default "hello") engine=maria; +create table t1 (a int, b varchar(30) default "hello"); insert into t1 (a) values (1),(2),(3),(4),(5); lock table t1 read local; connect (con1,localhost,root,,); @@ -777,7 +779,7 @@ set global concurrent_insert=@save_concurrent_insert; # BUG#9622 - ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX produce # different statistics on the same table with NULL values. -create table t1 (a int, key(a)) engine=maria; +create table t1 (a int, key(a)); insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); analyze table t1; @@ -789,14 +791,120 @@ show keys from t1; drop table t1; +# +# Bug#8706 - temporary table with data directory option fails +# +connect (session1,localhost,root,,); +connect (session2,localhost,root,,); + +connection session1; +disable_query_log; +eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 9 a; +enable_query_log; +disable_result_log; +show create table t1; +enable_result_log; + +connection session2; +disable_query_log; +eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 99 a; +enable_query_log; +disable_result_log; +show create table t1; +enable_result_log; + +connection default; +create table t1 (a int) select 42 a; + +connection session1; +select * from t1; +disconnect session1; +connection session2; +select * from t1; +disconnect session2; +connection default; +select * from t1; +drop table t1; + +--echo End of 4.1 tests + # # Bug#10056 - PACK_KEYS option take values greater than 1 while creating table # -create table t1 (c1 int) engine=maria pack_keys=0; -create table t2 (c1 int) engine=maria pack_keys=1; -create table t3 (c1 int) engine=maria pack_keys=default; +create table t1 (c1 int) pack_keys=0; +create table t2 (c1 int) pack_keys=1; +create table t3 (c1 int) pack_keys=default; --error 1064 -create table t4 (c1 int) engine=maria pack_keys=2; +create table t4 (c1 int) pack_keys=2; drop table t1, t2, t3; +--echo End of 5.0 tests + +# +# Test of key_block_size +# + +create table t1 (a int not null, key `a` (a) key_block_size=1024); +show create table t1; +drop table t1; + +create table t1 (a int not null, key `a` (a) key_block_size=2048); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a)); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a) key_block_size=1024); +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024; +show create table t1; +alter table t1 key_block_size=2048; +show create table t1; +alter table t1 add c int, add key (c); +show create table t1; +alter table t1 key_block_size=0; +alter table t1 add d int, add key (d); +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192; +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192; +show create table t1; +drop table t1; + +create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384; +show create table t1; +drop table t1; + + +# Test limits and errors of key_block_size + +create table t1 (a int not null, key `a` (a) key_block_size=512); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000); +show create table t1; +drop table t1; + +create table t1 (a int not null, key `a` (a) key_block_size=1025); +show create table t1; +drop table t1; + +--error 1064 +create table t1 (a int not null, key key_block_size=1024 (a)); +--error 1064 +create table t1 (a int not null, key `a` key_block_size=1024 (a)); + +--echo End of 5.1 tests + +eval set global storage_engine=$default; + # End of 5.2 tests diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 256f67f6125..3230a669641 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -524,7 +524,6 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) case HA_KEYTYPE_VARTEXT1: case HA_KEYTYPE_VARTEXT2: case HA_KEYTYPE_VARBINARY1: - case HA_KEYTYPE_VARTEXT2: case HA_KEYTYPE_VARBINARY2: { int a_length; diff --git a/sql/ha_maria.cc b/sql/ha_maria.cc index 740dfa85a23..aa244450bb5 100644 --- a/sql/ha_maria.cc +++ b/sql/ha_maria.cc @@ -30,8 +30,9 @@ #include "../storage/maria/ma_rt_index.h" #endif +#include + ulong maria_recover_options= HA_RECOVER_NONE; -static bool ha_maria_init(); /* bits in maria_recover_options */ const char *maria_recover_names[]= @@ -60,65 +61,9 @@ TYPELIB maria_stats_method_typelib= ** MARIA tables *****************************************************************************/ -static handler *maria_create_handler(TABLE_SHARE * table); - -/* MARIA handlerton */ -static const char maria_hton_name[]= "MARIA"; -static const char maria_hton_comment[]= - "Transactional storage engine, optimized for long running transactions"; - -handlerton maria_hton= +static handler *maria_create_handler(TABLE_SHARE * table, MEM_ROOT *mem_root) { - MYSQL_HANDLERTON_INTERFACE_VERSION, - maria_hton_name, - SHOW_OPTION_YES, - maria_hton_comment, - DB_TYPE_MARIA, - ha_maria_init, - 0, /* slot */ - 0, /* savepoint size. */ - NULL, /* close_connection */ - NULL, /* savepoint */ - NULL, /* rollback to savepoint */ - NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - /* - MARIA doesn't support transactions yet and doesn't have - transaction-dependent context: cursors can survive a commit. - */ - maria_create_handler, /* Create a new handler */ - NULL, /* Drop a database */ - maria_panic, /* Panic call */ - NULL, /* Start Consistent Snapshot */ - NULL, /* Flush logs */ - NULL, /* Show status */ - NULL, /* Partition flags */ - NULL, /* Alter table flags */ - NULL, /* Alter Tablespace */ - NULL, /* Fill Files Table */ - HTON_CAN_RECREATE, - NULL, /* binlog_func */ - NULL, /* binlog_log_query */ - NULL /* release_temporary_latches */ -}; - - -static bool ha_maria_init() -{ - return test(maria_init()); -} - -static handler *maria_create_handler(TABLE_SHARE *table) -{ - return new ha_maria(table); + return new (mem_root) ha_maria(table); } @@ -208,9 +153,10 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) ha_maria::ha_maria(TABLE_SHARE *table_arg): handler(&maria_hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | - HA_DUPP_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | - HA_FILE_BASED | HA_CAN_GEOMETRY | HA_READ_RND_SAME | - HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD), + HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | + HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS | + HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | + HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), can_enable_indexes(1) {} @@ -376,6 +322,7 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked) if (table->key_info[i].flags & HA_USES_PARSER) file->s->keyinfo[i].parser= (struct st_mysql_ftparser *) parser->plugin->info; + table->key_info[i].block_size= file->s->keyinfo[i].block_length; } return (0); } @@ -1284,13 +1231,14 @@ int ha_maria::index_read_idx(byte * buf, uint index, const byte * key, int ha_maria::index_read_last(byte * buf, const byte * key, uint key_len) { + DBUG_ENTER("ha_maria::index_read_last"); DBUG_ASSERT(inited == INDEX); statistic_increment(table->in_use->status_var.ha_read_key_count, &LOCK_status); int error= maria_rkey(file, buf, active_index, key, key_len, HA_READ_PREFIX_LAST); table->status= error ? STATUS_NOT_FOUND : 0; - return error; + DBUG_RETURN(error); } @@ -1355,7 +1303,7 @@ int ha_maria::rnd_init(bool scan) { if (scan) return maria_scan_init(file); - return maria_extra(file, HA_EXTRA_RESET, 0); + return maria_reset(file); // Free buffers } @@ -1400,24 +1348,23 @@ void ha_maria::info(uint flag) (void) maria_status(file, &info, flag); if (flag & HA_STATUS_VARIABLE) { - records= info.records; - deleted= info.deleted; - data_file_length= info.data_file_length; - index_file_length= info.index_file_length; - delete_length= info.delete_length; - check_time= info.check_time; - mean_rec_length= info.mean_reclength; + stats.records= info.records; + stats.deleted= info.deleted; + stats.data_file_length= info.data_file_length; + stats.index_file_length= info.index_file_length; + stats.delete_length= info.delete_length; + stats.check_time= info.check_time; + stats.mean_rec_length= info.mean_reclength; } if (flag & HA_STATUS_CONST) { TABLE_SHARE *share= table->s; - max_data_file_length= info.max_data_file_length; - max_index_file_length= info.max_index_file_length; - create_time= info.create_time; - sortkey= info.sortkey; + stats.max_data_file_length= info.max_data_file_length; + stats.max_index_file_length= info.max_index_file_length; + stats.create_time= info.create_time; ref_length= info.reflength; share->db_options_in_use= info.options; - block_size= maria_block_size; + stats.block_size= maria_block_size; /* Update share */ if (share->tmp_table == NO_TMP_TABLE) @@ -1448,11 +1395,11 @@ void ha_maria::info(uint flag) if (flag & HA_STATUS_ERRKEY) { errkey= info.errkey; - my_store_ptr(dupp_ref, ref_length, info.dupp_key_pos); + my_store_ptr(dup_ref, ref_length, info.dupp_key_pos); } /* Faster to always update, than to do it based on flag */ - update_time= info.update_time; - auto_increment_value= info.auto_increment; + stats.update_time= info.update_time; + stats.auto_increment_value= info.auto_increment; } @@ -1463,6 +1410,10 @@ int ha_maria::extra(enum ha_extra_function operation) return maria_extra(file, operation, 0); } +int ha_maria::reset(void) +{ + return maria_reset(file); +} /* To be used with WRITE_CACHE and EXTRA_CACHE */ @@ -1510,7 +1461,7 @@ void ha_maria::update_create_info(HA_CREATE_INFO *create_info) ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST); if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) { - create_info->auto_increment_value= auto_increment_value; + create_info->auto_increment_value= stats.auto_increment_value; } create_info->data_file_name= data_file_name; create_info->index_file_name= index_file_name; @@ -1552,6 +1503,8 @@ int ha_maria::create(const char *name, register TABLE *table_arg, keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : pos->algorithm; + keydef[i].block_length= pos->block_size; + keydef[i].seg= keyseg; keydef[i].keysegs= pos->key_parts; for (j= 0; j < pos->key_parts; j++) @@ -1731,7 +1684,10 @@ int ha_maria::rename_table(const char *from, const char *to) } -ulonglong ha_maria::get_auto_increment() +void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) { ulonglong nr; int error; @@ -1740,7 +1696,10 @@ ulonglong ha_maria::get_auto_increment() if (!table->s->next_number_key_offset) { // Autoincrement at key-start ha_maria::info(HA_STATUS_AUTO); - return auto_increment_value; + *first_value= stats.auto_increment_value; + /* Maria has only table-level lock for now, so reserves to +inf */ + *nb_reserved_values= ULONGLONG_MAX; + return; } /* it's safe to call the following if bulk_insert isn't on */ @@ -1761,7 +1720,14 @@ ulonglong ha_maria::get_auto_increment() val_int_offset(table->s->rec_buff_length) + 1); } extra(HA_EXTRA_NO_KEYREAD); - return nr; + *first_value= nr; + /* + MySQL needs to call us for next row: assume we are inserting ("a",null) + here, we return 3, and next this statement will want to insert ("b",null): + there is no reason why ("b",3+1) would be the good row to insert: maybe it + already exists, maybe 3+1 is too large... + */ + *nb_reserved_values= 1; } @@ -1825,9 +1791,11 @@ bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, { uint options= table->s->db_options_in_use; - if (info->auto_increment_value != auto_increment_value || + if (info->auto_increment_value != stats.auto_increment_value || info->data_file_name != data_file_name || - info->index_file_name != index_file_name || table_changes == IS_EQUAL_NO) + info->index_file_name != index_file_name || + table_changes == IS_EQUAL_NO || + table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet return COMPATIBLE_DATA_NO; if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | @@ -1838,14 +1806,29 @@ bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, return COMPATIBLE_DATA_YES; } +handlerton maria_hton; + +static int ha_maria_init() +{ + maria_hton.state=SHOW_OPTION_YES; + maria_hton.db_type=DB_TYPE_MARIA; + maria_hton.create=maria_create_handler; + maria_hton.panic=maria_panic; + maria_hton.flags=HTON_CAN_RECREATE; + return test(maria_init()); +} + +struct st_mysql_storage_engine maria_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION, &maria_hton }; + mysql_declare_plugin(maria) { MYSQL_STORAGE_ENGINE_PLUGIN, - &maria_hton, - maria_hton_name, + &maria_storage_engine, + "Maria", "MySQL AB", - maria_hton_comment, - NULL, /* Plugin Init */ + "Traditional transactional MySQL tables", + ha_maria_init, /* Plugin Init */ NULL, /* Plugin Deinit */ 0x0100, /* 1.0 */ 0 diff --git a/sql/ha_maria.h b/sql/ha_maria.h index 5387820edc6..cdd2305f6a1 100644 --- a/sql/ha_maria.h +++ b/sql/ha_maria.h @@ -49,7 +49,7 @@ public: { return "MARIA"; } const char *index_type(uint key_number); const char **bas_ext() const; - ulong table_flags() const + ulonglong table_flags() const { return int_table_flags; } ulong index_flags(uint inx, uint part, bool all_parts) const { @@ -106,6 +106,7 @@ public: void info(uint); int extra(enum ha_extra_function operation); int extra_opt(enum ha_extra_function operation, ulong cache_size); + int reset(void); int external_lock(THD * thd, int lock_type); int delete_all_rows(void); int disable_indexes(uint mode); @@ -118,7 +119,10 @@ public: int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info); THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, enum thr_lock_type lock_type); - ulonglong get_auto_increment(); + virtual void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values); int rename_table(const char *from, const char *to); int delete_table(const char *name); int check(THD * thd, HA_CHECK_OPT * check_opt); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index b1ff5bdc54d..a666bec32d7 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -6021,7 +6021,6 @@ log and this option does nothing anymore.", (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, #endif - {"max_allowed_packet", OPT_MAX_ALLOWED_PACKET, "Max packetlength to send/receive from to server.", (gptr*) &global_system_variables.max_allowed_packet, diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 4132a4208a2..d4315b4d446 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c cmakelists.txt +EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt pkgdata_DATA = ma_test_all ma_test_all.res INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 4af0f955b8b..69d863e6366 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -359,7 +359,7 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) puts("- check key delete-chain"); param->key_file_blocks=info->s->base.keystart; - for (key=0 ; key < info->s->state.header.max_block_size ; key++) + for (key=0 ; key < info->s->state.header.max_block_size_index ; key++) if (check_k_link(param,info,key)) { if (param->testflag & T_VERBOSE) puts(""); @@ -454,25 +454,24 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) if ((uint) share->base.auto_key -1 == key) { /* Check that auto_increment key is bigger than max key value */ - ulonglong save_auto_value=info->s->state.auto_increment; - info->s->state.auto_increment=0; + ulonglong auto_increment; info->lastinx=key; _ma_read_key_record(info, 0L, info->rec_buff); - _ma_update_auto_increment(info, info->rec_buff); - if (info->s->state.auto_increment > save_auto_value) + auto_increment= ma_retrieve_auto_increment(info, info->rec_buff); + if (auto_increment > info->s->state.auto_increment) { _ma_check_print_warning(param, "Auto-increment value: %s is smaller than max used value: %s", - llstr(save_auto_value,buff2), - llstr(info->s->state.auto_increment, buff)); + llstr(info->s->state.auto_increment,buff2), + llstr(auto_increment, buff)); } if (param->testflag & T_AUTO_INC) { - set_if_bigger(info->s->state.auto_increment, - param->auto_increment_value); + set_if_bigger(info->s->state.auto_increment, + auto_increment); + set_if_bigger(info->s->state.auto_increment, + param->auto_increment_value); } - else - info->s->state.auto_increment=save_auto_value; /* Check that there isn't a row with auto_increment = 0 in the table */ maria_extra(info,HA_EXTRA_KEYREAD,0); @@ -1160,7 +1159,7 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) #ifdef HAVE_RTREE_KEYS (keyinfo->flag & HA_SPATIAL) ? maria_rtree_find_first(info, key, info->lastkey, key_length, - SEARCH_SAME) : + MBR_EQUAL | MBR_DATA) : #endif _ma_search(info,keyinfo,info->lastkey,key_length, SEARCH_SAME, info->s->state.key_root[key]); @@ -1412,7 +1411,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, share->state.key_root[i]= HA_OFFSET_ERROR; /* Drop the delete chain. */ - for (i=0 ; i < share->state.header.max_block_size ; i++) + for (i=0 ; i < share->state.header.max_block_size_index ; i++) share->state.key_del[i]= HA_OFFSET_ERROR; /* @@ -1796,7 +1795,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); for (key=0 ; key < info->s->base.keys ; key++) info->s->state.key_root[key]=index_pos[key]; - for (key=0 ; key < info->s->state.header.max_block_size ; key++) + for (key=0 ; key < info->s->state.header.max_block_size_index ; key++) info->s->state.key_del[key]= HA_OFFSET_ERROR; info->s->state.changed&= ~STATE_NOT_SORTED_PAGES; @@ -2079,7 +2078,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, /* Clear the pointers to the given rows */ for (i=0 ; i < share->base.keys ; i++) share->state.key_root[i]= HA_OFFSET_ERROR; - for (i=0 ; i < share->state.header.max_block_size ; i++) + for (i=0 ; i < share->state.header.max_block_size_index ; i++) share->state.key_del[i]= HA_OFFSET_ERROR; info->state->key_file_length=share->base.keystart; } @@ -2101,6 +2100,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); sort_param.wordlist=NULL; + init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); if (share->data_file_type == DYNAMIC_RECORD) length=max(share->base.min_pack_length+1,share->base.min_block_length); @@ -2163,12 +2163,36 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, { uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* sort_param.keyinfo->seg->charset->mbmaxlen; - sort_info.max_records= - (ha_rows) (sort_info.filelength/ft_min_word_len+1); + sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + /* + fulltext indexes may have much more entries than the + number of rows in the table. We estimate the number here. + + Note, built-in parser is always nr. 0 - see ftparser_call_initializer() + */ + if (sort_param.keyinfo->ftparser_nr == 0) + { + /* + for built-in parser the number of generated index entries + cannot be larger than the size of the data file divided + by the minimal word's length + */ + sort_info.max_records= + (ha_rows) (sort_info.filelength/ft_min_word_len+1); + } + else + { + /* + for external plugin parser we cannot tell anything at all :( + so, we'll use all the sort memory and start from ~10 buffpeks. + (see _create_index_by_sort) + */ + sort_info.max_records= + 10*param->sort_buffer_length/sort_param.key_length; + } sort_param.key_read=sort_maria_ft_key_read; sort_param.key_write=sort_maria_ft_key_write; - sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; } else { @@ -2184,6 +2208,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, goto err; } param->calc_checksum=0; /* No need to calc glob_crc */ + free_root(&sort_param.wordroot, MYF(0)); /* Set for next loop */ sort_info.max_records= (ha_rows) info->state->records; @@ -2447,7 +2472,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, /* Clear the pointers to the given rows */ for (i=0 ; i < share->base.keys ; i++) share->state.key_root[i]= HA_OFFSET_ERROR; - for (i=0 ; i < share->state.header.max_block_size ; i++) + for (i=0 ; i < share->state.header.max_block_size_index ; i++) share->state.key_del[i]= HA_OFFSET_ERROR; info->state->key_file_length=share->base.keystart; } @@ -2573,6 +2598,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* sort_param[i].keyinfo->seg->charset->mbmaxlen; sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); } } sort_info.total_keys=i; @@ -2794,10 +2820,12 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, void *key) { for (;;) { - my_free((char*) wptr, MYF(MY_ALLOW_ZERO_PTR)); + free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE)); if ((error=sort_get_next_record(sort_param))) DBUG_RETURN(error); - if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record))) + if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record, + &sort_param->wordroot))) + DBUG_RETURN(1); if (wptr->pos) break; @@ -2821,7 +2849,7 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, void *key) #endif if (!wptr->pos) { - my_free((char*) sort_param->wordlist, MYF(0)); + free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE)); sort_param->wordlist=0; error=_ma_sort_write_record(sort_param); } @@ -3784,6 +3812,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) ha_rows max_records; ulonglong file_length,tmp_length; MARIA_CREATE_INFO create_info; + DBUG_ENTER("maria_recreate_table"); error=1; /* Default error */ info= **org_info; @@ -3793,7 +3822,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && (param->testflag & T_UNPACK); if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF)*share.base.keys))) - return 0; + DBUG_RETURN(0); memcpy((byte*) keyinfo,(byte*) share.keyinfo, (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys)); @@ -3802,14 +3831,14 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) (key_parts+share.base.keys)))) { my_afree((gptr) keyinfo); - return 1; + DBUG_RETURN(1); } if (!(recdef=(MARIA_COLUMNDEF*) my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)))) { my_afree((gptr) keyinfo); my_afree((gptr) keysegs); - return 1; + DBUG_RETURN(1); } if (!(uniquedef=(MARIA_UNIQUEDEF*) my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1)))) @@ -3817,7 +3846,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) my_afree((gptr) recdef); my_afree((gptr) keyinfo); my_afree((gptr) keysegs); - return 1; + DBUG_RETURN(1); } /* Copy the column definitions */ @@ -3887,6 +3916,11 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) create_info.language = (param->language ? param->language : share.state.header.language); create_info.key_file_length= status_info.key_file_length; + /* + Allow for creating an auto_increment key. This has an effect only if + an auto_increment key exists in the original table. + */ + create_info.with_auto_increment= TRUE; /* We don't have to handle symlinks here because we are using HA_DONT_TOUCH_DATA */ if (maria_create(filename, @@ -3931,7 +3965,7 @@ end: my_afree((gptr) keyinfo); my_afree((gptr) recdef); my_afree((gptr) keysegs); - return error; + DBUG_RETURN(error); } @@ -4034,6 +4068,8 @@ void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, my_bool repair_only) { byte *record; + DBUG_ENTER("update_auto_increment_key"); + if (!info->s->base.auto_key || ! maria_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1)) { @@ -4041,7 +4077,7 @@ void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, _ma_check_print_info(param, "Table: %s doesn't have an auto increment key\n", param->isam_file_name); - return; + DBUG_VOID_RETURN; } if (!(param->testflag & T_SILENT) && !(param->testflag & T_REP)) @@ -4054,7 +4090,7 @@ void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, MYF(0)))) { _ma_check_print_error(param,"Not enough memory for extra record"); - return; + DBUG_VOID_RETURN; } maria_extra(info,HA_EXTRA_KEYREAD,0); @@ -4065,23 +4101,22 @@ void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, maria_extra(info,HA_EXTRA_NO_KEYREAD,0); my_free((char*) record, MYF(0)); _ma_check_print_error(param,"%d when reading last record",my_errno); - return; + DBUG_VOID_RETURN; } if (!repair_only) info->s->state.auto_increment=param->auto_increment_value; } else { - ulonglong auto_increment= (repair_only ? info->s->state.auto_increment : - param->auto_increment_value); - info->s->state.auto_increment=0; - _ma_update_auto_increment(info, record); + ulonglong auto_increment= ma_retrieve_auto_increment(info, record); set_if_bigger(info->s->state.auto_increment,auto_increment); + if (!repair_only) + set_if_bigger(info->s->state.auto_increment, param->auto_increment_value); } maria_extra(info,HA_EXTRA_NO_KEYREAD,0); my_free((char*) record, MYF(0)); maria_update_state_info(param, info, UPDATE_AUTO_INC); - return; + DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index b15b5b0ae02..b9fb4eb0d5b 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -28,9 +28,9 @@ #endif #include - /* - ** Old options is used when recreating database, from isamchk - */ +/* + Old options is used when recreating database, from maria_chk +*/ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, uint columns, MARIA_COLUMNDEF *recinfo, @@ -45,6 +45,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, key_length,info_length,key_segs,options,min_key_length_skip, base_pos,long_varchar_count,varchar_length, max_key_block_length,unique_key_parts,fulltext_keys,offset; + uint aligned_key_start, block_length; ulong reclength, real_reclength,min_pack_length; char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; ulong pack_reclength; @@ -59,6 +60,8 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MARIA_MAX_KEY_BLOCK_SIZE]; MARIA_CREATE_INFO tmp_create_info; DBUG_ENTER("maria_create"); + DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", + keys, columns, uniques, flags)); if (!ci) { @@ -428,8 +431,16 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, key_segs) share.state.rec_per_key_part[key_segs-1]=1L; length+=key_length; + /* Get block length for key, if defined by user */ + block_length= (keydef->block_length ? + my_round_up_to_next_power(keydef->block_length) : + maria_block_size); + block_length= max(block_length, MARIA_MIN_KEY_BLOCK_LENGTH); + block_length= min(block_length, MARIA_MAX_KEY_BLOCK_LENGTH); + keydef->block_length= MARIA_BLOCK_SIZE(length-real_length_diff, - pointer,MARIA_MAX_KEYPTR_SIZE); + pointer,MARIA_MAX_KEYPTR_SIZE, + block_length); if (keydef->block_length > MARIA_MAX_KEY_BLOCK_LENGTH || length >= HA_MAX_KEY_BUFF) { @@ -474,6 +485,17 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ columns*MARIA_COLUMNDEF_SIZE); + DBUG_PRINT("info", ("info_length: %u", info_length)); + /* There are only 16 bits for the total header length. */ + if (info_length > 65535) + { + my_printf_error(0, "Maria table '%s' has too many columns and/or " + "indexes and/or unique constraints.", + MYF(0), name + dirname_length(name)); + my_errno= HA_WRONG_CREATE_OPTION; + goto err; + } + bmove(share.state.header.file_version,(byte*) maria_file_magic,4); ci->old_options=options| (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ? HA_OPTION_COMPRESS_RECORD | @@ -485,7 +507,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, mi_int2store(share.state.header.base_pos,base_pos); share.state.header.language= (ci->language ? ci->language : default_charset_info->number); - share.state.header.max_block_size=max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH; + share.state.header.max_block_size_index= max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH; share.state.dellink = HA_OFFSET_ERROR; share.state.process= (ulong) getpid(); @@ -512,8 +534,12 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, mi_int2store(share.state.header.unique_key_parts,unique_key_parts); maria_set_all_keys_active(share.state.key_map, keys); + aligned_key_start= my_round_up_to_next_power(max_key_block_length ? + max_key_block_length : + maria_block_size); + share.base.keystart = share.state.state.key_file_length= - MY_ALIGN(info_length, maria_block_size); + MY_ALIGN(info_length, aligned_key_start); share.base.max_key_block_length=max_key_block_length; share.base.max_key_length=ALIGN_SIZE(max_key_length+4); share.base.records=ci->max_rows; @@ -549,9 +575,21 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, { char *iext= strrchr(ci->index_file_name, '.'); int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT); - - fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT, - MY_UNPACK_FILENAME| (have_iext ? MY_REPLACE_EXT :MY_APPEND_EXT)); + if (options & HA_OPTION_TMP_TABLE) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->index_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->index_file_name, MARIA_NAME_IEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME | (have_iext ? MY_REPLACE_EXT : + MY_APPEND_EXT)); + } fn_format(linkname, name, "", MARIA_NAME_IEXT, MY_UNPACK_FILENAME|MY_APPEND_EXT); linkname_ptr=linkname; @@ -614,9 +652,21 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, char *dext= strrchr(ci->data_file_name, '.'); int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); - fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | - (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + if (options & HA_OPTION_TMP_TABLE) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + } fn_format(linkname, name, "",MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT); linkname_ptr=linkname; @@ -636,7 +686,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } errpos=3; } - + DBUG_PRINT("info", ("write state info and base info")); if (_ma_state_info_write(file, &share.state, 2) || _ma_base_info_write(file, &share.base)) goto err; @@ -650,6 +700,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, #endif /* Write key and keyseg definitions */ + DBUG_PRINT("info", ("write key and keyseg definitions")); for (i=0 ; i < share.base.keys - uniques; i++) { uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0; @@ -700,6 +751,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } /* Save unique definition */ + DBUG_PRINT("info", ("write unique definitions")); for (i=0 ; i < share.state.header.uniques ; i++) { HA_KEYSEG *keyseg_end; @@ -730,6 +782,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, goto err; } } + DBUG_PRINT("info", ("write field definitions")); for (i=0 ; i < share.base.fields ; i++) if (_ma_recinfo_write(file, &recinfo[i])) goto err; @@ -744,6 +797,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, #endif /* Enlarge files */ + DBUG_PRINT("info", ("enlarge to keystart: %lu", (ulong) share.base.keystart)); if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0))) goto err; @@ -772,7 +826,7 @@ err: VOID(my_close(dfile,MYF(0))); /* fall through */ case 2: - /* QQ: Tõnu should add a call to my_raid_delete() here */ + /* QQ: Tõnu should add a call to my_raid_delete() here */ if (! (flags & HA_DONT_TOUCH_DATA)) my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT), diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index a9af9a62c99..dd781a93fc4 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -34,12 +34,24 @@ int maria_delete_table(const char *name) #ifdef USE_RAID { MARIA_HA *info; - /* we use 'open_for_repair' to be able to delete a crashed table */ - if (!(info=maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR))) - DBUG_RETURN(my_errno); - raid_type = info->s->base.raid_type; - raid_chunks = info->s->base.raid_chunks; - maria_close(info); + /* + When built with RAID support, we need to determine if this table + makes use of the raid feature. If yes, we need to remove all raid + chunks. This is done with my_raid_delete(). Unfortunately it is + necessary to open the table just to check this. We use + 'open_for_repair' to be able to open even a crashed table. If even + this open fails, we assume no raid configuration for this table + and try to remove the normal data file only. This may however + leave the raid chunks behind. + */ + if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR))) + raid_type= 0; + else + { + raid_type= info->s->base.raid_type; + raid_chunks= info->s->base.raid_chunks; + maria_close(info); + } } #ifdef EXTRA_DEBUG _ma_check_table_is_closed(name,"delete"); diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 18efc0adbd0..047826408c3 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -67,6 +67,11 @@ static int _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size) { DBUG_ENTER("_ma_dynmap_file"); + if (size > (my_off_t) (~((size_t) 0)) - MEMMAP_EXTRA_MARGIN) + { + DBUG_PRINT("warning", ("File is too large for mmap")); + DBUG_RETURN(1); + } info->s->file_map= (byte*) my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN), info->s->mode==O_RDONLY ? PROT_READ : @@ -1324,6 +1329,9 @@ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) info->rec_cache.pos_in_file <= block_info.next_filepos && flush_io_cache(&info->rec_cache)) goto err; + /* A corrupted table can have wrong pointers. (Bug# 19835) */ + if (block_info.next_filepos == HA_OFFSET_ERROR) + goto panic; info->rec_cache.seek_not_done=1; if ((b_type= _ma_get_block_info(&block_info,file, block_info.next_filepos)) diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index 06a36cba238..d600fedb99b 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -47,29 +47,6 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg DBUG_PRINT("enter",("function: %d",(int) function)); switch (function) { - case HA_EXTRA_RESET: - /* - Free buffers and reset the following flags: - EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK - - If the row buffer cache is large (for dynamic tables), reduce it - to save memory. - */ - if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) - { - info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); - error=end_io_cache(&info->rec_cache); - } - if (share->base.blobs) - _ma_alloc_rec_buff(info, -1, &info->rec_buff); -#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) - if (info->opt_flag & MEMMAP_USED) - madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); -#endif - info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); - info->quick_mode=0; - /* Fall through */ - case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ info->lastinx= 0; /* Use first index as def */ info->last_search_keypage=info->lastpos= HA_OFFSET_ERROR; @@ -423,3 +400,37 @@ static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function) } } } + + +int maria_reset(MARIA_HA *info) +{ + int error= 0; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_reset"); + /* + Free buffers and reset the following flags: + EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK + + If the row buffer cache is large (for dynamic tables), reduce it + to save memory. + */ + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error= end_io_cache(&info->rec_cache); + } + if (share->base.blobs) + _ma_alloc_rec_buff(info, -1, &info->rec_buff); +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + info->quick_mode=0; + info->lastinx= 0; /* Use first index as def */ + info->last_search_keypage= info->lastpos= HA_OFFSET_ERROR; + info->page_changed= 1; + info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | + HA_STATE_PREV_FOUND); + DBUG_RETURN(error); +} diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c index 2b8e0d8b97a..83901cb5e47 100644 --- a/storage/maria/ma_ft_boolean_search.c +++ b/storage/maria/ma_ft_boolean_search.c @@ -167,10 +167,11 @@ typedef struct st_my_ftb_param } MY_FTB_PARAM; -static int ftb_query_add_word(void *param, char *word, int word_len, +static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *info) { - MY_FTB_PARAM *ftb_param= (MY_FTB_PARAM *)param; + MY_FTB_PARAM *ftb_param= param->mysql_ftparam; FTB_WORD *ftbw; FTB_EXPR *ftbe, *tmp_expr; FT_WORD *phrase_word; @@ -268,9 +269,10 @@ static int ftb_query_add_word(void *param, char *word, int word_len, } -static int ftb_parse_query_internal(void *param, char *query, int len) +static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param, + char *query, int len) { - MY_FTB_PARAM *ftb_param= (MY_FTB_PARAM *)param; + MY_FTB_PARAM *ftb_param= param->mysql_ftparam; MYSQL_FTPARSER_BOOLEAN_INFO info; CHARSET_INFO *cs= ftb_param->ftb->charset; char **start= &query; @@ -280,7 +282,7 @@ static int ftb_parse_query_internal(void *param, char *query, int len) info.prev= ' '; info.quot= 0; while (maria_ft_get_word(cs, start, end, &w, &info)) - ftb_query_add_word(param, w.pos, w.len, &info); + param->mysql_add_word(param, w.pos, w.len, &info); return(0); } @@ -295,20 +297,21 @@ static void _ftb_parse_query(FTB *ftb, byte *query, uint len, if (ftb->state != UNINITIALIZED) DBUG_VOID_RETURN; + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0))) + DBUG_VOID_RETURN; ftb_param.ftb= ftb; ftb_param.depth= 0; ftb_param.ftbe= ftb->root; ftb_param.up_quot= 0; - if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr))) - DBUG_VOID_RETURN; param->mysql_parse= ftb_parse_query_internal; param->mysql_add_word= ftb_query_add_word; param->mysql_ftparam= (void *)&ftb_param; param->cs= ftb->charset; param->doc= query; param->length= len; + param->flags= 0; param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO; parser->parse(param); DBUG_VOID_RETURN; @@ -577,10 +580,11 @@ typedef struct st_my_ftb_phrase_param } MY_FTB_PHRASE_PARAM; -static int ftb_phrase_add_word(void *param, char *word, int word_len, +static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) { - MY_FTB_PHRASE_PARAM *phrase_param= (MY_FTB_PHRASE_PARAM *)param; + MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam; FT_WORD *w= (FT_WORD *)phrase_param->document->data; LIST *phrase, *document; w->pos= word; @@ -608,14 +612,15 @@ static int ftb_phrase_add_word(void *param, char *word, int word_len, } -static int ftb_check_phrase_internal(void *param, char *document, int len) +static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param, + char *document, int len) { FT_WORD word; - MY_FTB_PHRASE_PARAM *phrase_param= (MY_FTB_PHRASE_PARAM *)param; + MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam; const char *docend= document + len; while (maria_ft_simple_get_word(phrase_param->cs, &document, docend, &word, FALSE)) { - ftb_phrase_add_word(param, word.pos, word.len, 0); + param->mysql_add_word(param, word.pos, word.len, 0); if (phrase_param->match) return 1; } @@ -644,7 +649,8 @@ static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len, MYSQL_FTPARSER_PARAM *param; DBUG_ENTER("_ftb_check_phrase"); DBUG_ASSERT(parser); - if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr))) + + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1))) DBUG_RETURN(0); ftb_param.phrase= ftbe->phrase; ftb_param.document= ftbe->document; @@ -659,6 +665,7 @@ static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len, param->cs= ftb->charset; param->doc= (byte *)document; param->length= len; + param->flags= 0; param->mode= MYSQL_FTPARSER_WITH_STOPWORDS; parser->parse(param); DBUG_RETURN(ftb_param.match ? 1 : 0); @@ -819,10 +826,11 @@ typedef struct st_my_ftb_find_param } MY_FTB_FIND_PARAM; -static int ftb_find_relevance_add_word(void *param, char *word, int len, +static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) { - MY_FTB_FIND_PARAM *ftb_param= (MY_FTB_FIND_PARAM *)param; + MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam; FT_INFO *ftb= ftb_param->ftb; FTB_WORD *ftbw; int a, b, c; @@ -852,13 +860,15 @@ static int ftb_find_relevance_add_word(void *param, char *word, int len, } -static int ftb_find_relevance_parse(void *param, char *doc, int len) +static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param, + char *doc, int len) { - FT_INFO *ftb= ((MY_FTB_FIND_PARAM *)param)->ftb; + MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam; + FT_INFO *ftb= ftb_param->ftb; char *end= doc + len; FT_WORD w; while (maria_ft_simple_get_word(ftb->charset, &doc, end, &w, TRUE)) - ftb_find_relevance_add_word(param, w.pos, w.len, 0); + param->mysql_add_word(param, w.pos, w.len, 0); return(0); } @@ -878,7 +888,7 @@ float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) return -2.0; if (!ftb->queue.elements) return 0; - if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr))) + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0))) return 0; if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos) @@ -904,17 +914,17 @@ float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) ftb_param.ftb= ftb; ftb_param.ftsi= &ftsi2; + param->mysql_parse= ftb_find_relevance_parse; + param->mysql_add_word= ftb_find_relevance_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->flags= 0; + param->cs= ftb->charset; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + while (_ma_ft_segiterator(&ftsi)) { if (!ftsi.pos) continue; - /* Since subsequent call to _ftb_check_phrase overwrites param elements, - it must be reinitialized at each iteration _inside_ the loop. */ - param->mysql_parse= ftb_find_relevance_parse; - param->mysql_add_word= ftb_find_relevance_add_word; - param->mysql_ftparam= (void *)&ftb_param; - param->cs= ftb->charset; - param->mode= MYSQL_FTPARSER_SIMPLE_MODE; param->doc= (byte *)ftsi.pos; param->length= ftsi.len; parser->parse(param); diff --git a/storage/maria/ma_ft_eval.c b/storage/maria/ma_ft_eval.c index b9b496fc268..fe4900aeb64 100644 --- a/storage/maria/ma_ft_eval.c +++ b/storage/maria/ma_ft_eval.c @@ -55,6 +55,7 @@ int main(int argc, char *argv[]) /* Define a key over the first column */ keyinfo[0].seg=keyseg; keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ keyinfo[0].seg[0].type= HA_KEYTYPE_TEXT; keyinfo[0].seg[0].flag= HA_BLOB_PART; keyinfo[0].seg[0].start=recinfo[0].length; diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c index a9741787fc9..993857aecbb 100644 --- a/storage/maria/ma_ft_nlq_search.c +++ b/storage/maria/ma_ft_nlq_search.c @@ -226,7 +226,7 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, aio.charset=info->s->keyinfo[keynr].seg->charset; aio.keybuff=info->lastkey+info->s->base.max_key_length; parser= info->s->keyinfo[keynr].parser; - if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr))) + if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0))) goto err; bzero(&wtree,sizeof(wtree)); @@ -235,7 +235,9 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, NULL, NULL); maria_ft_parse_init(&wtree, aio.charset); - if (maria_ft_parse(&wtree, query, query_len, 0, parser, ftparser_param)) + ftparser_param->flags= 0; + if (maria_ft_parse(&wtree, query, query_len, parser, ftparser_param, + &wtree.mem_root)) goto err; if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, @@ -255,7 +257,9 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, if (!(*info->read_record)(info,docid,record)) { info->update|= HA_STATE_AKTIV; - _ma_ft_parse(&wtree, info, keynr, record, 1, ftparser_param); + ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY; + _ma_ft_parse(&wtree, info, keynr, record, ftparser_param, + &wtree.mem_root); } } delete_queue(&best); diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c index 983bebf3562..1c6e0267d53 100644 --- a/storage/maria/ma_ft_parser.c +++ b/storage/maria/ma_ft_parser.c @@ -28,7 +28,7 @@ typedef struct st_maria_ft_docstat { typedef struct st_my_maria_ft_parser_param { TREE *wtree; - my_bool with_alloc; + MEM_ROOT *mem_root; } MY_FT_PARSER_PARAM; @@ -48,14 +48,14 @@ static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) /* transforms tree of words into the array, applying normalization */ -FT_WORD * maria_ft_linearize(TREE *wtree) +FT_WORD * maria_ft_linearize(TREE *wtree, MEM_ROOT *mem_root) { FT_WORD *wlist,*p; FT_DOCSTAT docstat; DBUG_ENTER("maria_ft_linearize"); - if ((wlist=(FT_WORD *) my_malloc(sizeof(FT_WORD)* - (1+wtree->elements_in_tree),MYF(0)))) + if ((wlist=(FT_WORD *) alloc_root(mem_root, sizeof(FT_WORD)* + (1+wtree->elements_in_tree)))) { docstat.list=wlist; docstat.uniq=wtree->elements_in_tree; @@ -114,6 +114,7 @@ byte maria_ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param) { byte *doc=*start; + int ctype; uint mwc, length, mbl; param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); @@ -122,9 +123,11 @@ byte maria_ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, while (doc 0 ? mbl : 1)) { - if (true_word_char(cs,*doc)) break; + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + break; if (*doc == FTB_RQUOT && param->quot) { param->quot=doc; @@ -158,14 +161,16 @@ byte maria_ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, } mwc=length=0; - for (word->pos=doc; docpos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) mwc=0; else if (!misc_word_char(*doc) || mwc) break; else mwc++; - + } param->prev='A'; /* be sure *prev is true_word_char */ word->len= (uint)(doc-word->pos) - mwc; if ((param->trunc=(doc 0 ? mbl : 1)) { - if (doc >= end) DBUG_RETURN(0); - if (true_word_char(cs, *doc)) break; + if (doc >= end) + DBUG_RETURN(0); + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + break; } mwc= length= 0; - for (word->pos=doc; docpos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) mwc= 0; else if (!misc_word_char(*doc) || mwc) break; else mwc++; + } word->len= (uint)(doc-word->pos) - mwc; @@ -241,19 +253,20 @@ void maria_ft_parse_init(TREE *wtree, CHARSET_INFO *cs) } -static int maria_ft_add_word(void *param, byte *word, uint word_len, +static int maria_ft_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) { TREE *wtree; FT_WORD w; + MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; DBUG_ENTER("maria_ft_add_word"); - wtree= ((MY_FT_PARSER_PARAM *)param)->wtree; - if (((MY_FT_PARSER_PARAM *)param)->with_alloc) + wtree= ft_param->wtree; + if (param->flags & MYSQL_FTFLAGS_NEED_COPY) { byte *ptr; - /* allocating the data in the tree - to avoid mallocs and frees */ DBUG_ASSERT(wtree->with_delete == 0); - ptr= (byte *)alloc_root(&wtree->mem_root, word_len); + ptr= (byte *)alloc_root(ft_param->mem_root, word_len); memcpy(ptr, word, word_len); w.pos= ptr; } @@ -269,30 +282,31 @@ static int maria_ft_add_word(void *param, byte *word, uint word_len, } -static int maria_ft_parse_internal(void *param, byte *doc, uint doc_len) +static int maria_ft_parse_internal(MYSQL_FTPARSER_PARAM *param, + byte *doc, int doc_len) { byte *end=doc+doc_len; + MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; + TREE *wtree= ft_param->wtree; FT_WORD w; - TREE *wtree; DBUG_ENTER("maria_ft_parse_internal"); - wtree= ((MY_FT_PARSER_PARAM *)param)->wtree; while (maria_ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE)) - if (maria_ft_add_word(param, w.pos, w.len, 0)) + if (param->mysql_add_word(param, w.pos, w.len, 0)) DBUG_RETURN(1); DBUG_RETURN(0); } -int maria_ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc, +int maria_ft_parse(TREE *wtree, byte *doc, int doclen, struct st_mysql_ftparser *parser, - MYSQL_FTPARSER_PARAM *param) + MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root) { MY_FT_PARSER_PARAM my_param; DBUG_ENTER("maria_ft_parse"); DBUG_ASSERT(parser); my_param.wtree= wtree; - my_param.with_alloc= with_alloc; + my_param.mem_root= mem_root; param->mysql_parse= maria_ft_parse_internal; param->mysql_add_word= maria_ft_add_word; @@ -305,7 +319,9 @@ int maria_ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc, } -MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, uint keynr) +#define MAX_PARAM_NR 2 +MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, + uint keynr, uint paramnr) { uint32 ftparser_nr; struct st_mysql_ftparser *parser; @@ -344,9 +360,16 @@ MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, uint keynr } info->s->ftparsers= ftparsers; } + /* + We have to allocate two MYSQL_FTPARSER_PARAM structures per plugin + because in a boolean search a parser is called recursively + ftb_find_relevance* calls ftb_check_phrase* + (MAX_PARAM_NR=2) + */ info->ftparser_param= (MYSQL_FTPARSER_PARAM *) - my_malloc(sizeof(MYSQL_FTPARSER_PARAM) * + my_malloc(MAX_PARAM_NR * sizeof(MYSQL_FTPARSER_PARAM) * info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL)); + init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); if (! info->ftparser_param) return 0; } @@ -360,6 +383,8 @@ MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, uint keynr ftparser_nr= info->s->keyinfo[keynr].ftparser_nr; parser= info->s->keyinfo[keynr].parser; } + DBUG_ASSERT(paramnr < MAX_PARAM_NR); + ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr; if (! info->ftparser_param[ftparser_nr].mysql_add_word) { /* Note, that mysql_add_word is used here as a flag: @@ -376,19 +401,25 @@ MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, uint keynr void maria_ftparser_call_deinitializer(MARIA_HA *info) { - uint i, keys= info->s->state.header.keys; + uint i, j, keys= info->s->state.header.keys; + free_root(&info->ft_memroot, MYF(0)); if (! info->ftparser_param) return; for (i= 0; i < keys; i++) { MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; - MYSQL_FTPARSER_PARAM *ftparser_param= - &info->ftparser_param[keyinfo->ftparser_nr]; - if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word) + for (j=0; j < MAX_PARAM_NR; j++) { - if (keyinfo->parser->deinit) - keyinfo->parser->deinit(ftparser_param); - ftparser_param->mysql_add_word= 0; + MYSQL_FTPARSER_PARAM *ftparser_param= + &info->ftparser_param[keyinfo->ftparser_nr*MAX_PARAM_NR + j]; + if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word) + { + if (keyinfo->parser->deinit) + keyinfo->parser->deinit(ftparser_param); + ftparser_param->mysql_add_word= 0; + } + else + break; } } } diff --git a/storage/maria/ma_ft_test1.c b/storage/maria/ma_ft_test1.c index 2880f6bcdc1..595c31e774c 100644 --- a/storage/maria/ma_ft_test1.c +++ b/storage/maria/ma_ft_test1.c @@ -90,6 +90,7 @@ static int run_test(const char *filename) /* Define a key over the first column */ keyinfo[0].seg=keyseg; keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ keyinfo[0].seg[0].type= key_type; keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART: (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0; diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c index c9e2112578c..965f9afc91d 100644 --- a/storage/maria/ma_ft_update.c +++ b/storage/maria/ma_ft_update.c @@ -95,9 +95,8 @@ uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi) /* parses a document i.e. calls maria_ft_parse for every keyseg */ -uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, - const byte *record, my_bool with_alloc, - MYSQL_FTPARSER_PARAM *param) +uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, const byte *record, + MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root) { FT_SEG_ITERATOR ftsi; struct st_mysql_ftparser *parser; @@ -110,25 +109,27 @@ uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, while (_ma_ft_segiterator(&ftsi)) { if (ftsi.pos) - if (maria_ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc, parser, - param)) + if (maria_ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, parser, param, + mem_root)) DBUG_RETURN(1); } DBUG_RETURN(0); } -FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const byte *record) +FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const byte *record, + MEM_ROOT *mem_root) { TREE ptree; MYSQL_FTPARSER_PARAM *param; DBUG_ENTER("_ma_ft_parserecord"); - if (! (param= maria_ftparser_call_initializer(info, keynr))) + if (! (param= maria_ftparser_call_initializer(info, keynr, 0))) DBUG_RETURN(NULL); bzero((char*) &ptree, sizeof(ptree)); - if (_ma_ft_parse(&ptree, info, keynr, record, 0, param)) + param->flags= 0; + if (_ma_ft_parse(&ptree, info, keynr, record, param, mem_root)) DBUG_RETURN(NULL); - DBUG_RETURN(maria_ft_linearize(&ptree)); + DBUG_RETURN(maria_ft_linearize(&ptree, mem_root)); } static int _ma_ft_store(MARIA_HA *info, uint keynr, byte *keybuf, @@ -174,10 +175,6 @@ int _ma_ft_cmp(MARIA_HA *info, uint keynr, const byte *rec1, const byte *rec2) FT_SEG_ITERATOR ftsi1, ftsi2; CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; DBUG_ENTER("_ma_ft_cmp"); -#ifndef MYSQL_HAS_TRUE_CTYPE_IMPLEMENTATION - if (cs->mbmaxlen > 1) - DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); -#endif _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1); _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2); @@ -206,10 +203,11 @@ int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, int cmp, cmp2; DBUG_ENTER("_ma_ft_update"); - if (!(old_word=oldlist= _ma_ft_parserecord(info, keynr, oldrec))) - goto err0; - if (!(new_word=newlist= _ma_ft_parserecord(info, keynr, newrec))) - goto err1; + if (!(old_word=oldlist=_ma_ft_parserecord(info, keynr, oldrec, + &info->ft_memroot)) || + !(new_word=newlist=_ma_ft_parserecord(info, keynr, newrec, + &info->ft_memroot))) + goto err; error=0; while(old_word->pos && new_word->pos) @@ -222,13 +220,13 @@ int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, { key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos); if ((error= _ma_ck_delete(info,keynr,(uchar*) keybuf,key_length))) - goto err2; + goto err; } if (cmp > 0 || cmp2) { key_length= _ma_ft_make_key(info,keynr,keybuf,new_word,pos); if ((error= _ma_ck_write(info,keynr,(uchar*) keybuf,key_length))) - goto err2; + goto err; } if (cmp<=0) old_word++; if (cmp>=0) new_word++; @@ -238,11 +236,8 @@ int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, else if (new_word->pos) error= _ma_ft_store(info,keynr,keybuf,new_word,pos); -err2: - my_free((char*) newlist,MYF(0)); -err1: - my_free((char*) oldlist,MYF(0)); -err0: +err: + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); DBUG_RETURN(error); } @@ -255,12 +250,12 @@ int _ma_ft_add(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, int error= -1; FT_WORD *wlist; DBUG_ENTER("_ma_ft_add"); + DBUG_PRINT("enter",("keynr: %d",keynr)); - if ((wlist= _ma_ft_parserecord(info, keynr, record))) - { + if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot))) error= _ma_ft_store(info,keynr,keybuf,wlist,pos); - my_free((char*) wlist,MYF(0)); - } + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_PRINT("exit",("Return: %d",error)); DBUG_RETURN(error); } @@ -275,11 +270,9 @@ int _ma_ft_del(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, DBUG_ENTER("_ma_ft_del"); DBUG_PRINT("enter",("keynr: %d",keynr)); - if ((wlist= _ma_ft_parserecord(info, keynr, record))) - { + if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot))) error= _ma_ft_erase(info,keynr,keybuf,wlist,pos); - my_free((char*) wlist,MYF(0)); - } + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); DBUG_PRINT("exit",("Return: %d",error)); DBUG_RETURN(error); } diff --git a/storage/maria/ma_ftdefs.h b/storage/maria/ma_ftdefs.h index 41248d1bc9c..def7e92e6e0 100644 --- a/storage/maria/ma_ftdefs.h +++ b/storage/maria/ma_ftdefs.h @@ -24,12 +24,15 @@ #include #include -#define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_') +#define true_word_char(ctype, character) \ + ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \ + (character) == '_') #define misc_word_char(X) 0 -#define word_char(s,X) (true_word_char(s,X) || misc_word_char(X)) #define FT_MAX_WORD_LEN_FOR_SORT 31 +#define FTPARSER_MEMROOT_ALLOC_SIZE 65536 + #define COMPILE_STOPWORDS_IN /* Interested readers may consult SMART @@ -119,12 +122,12 @@ void _ma_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); uint _ma_ft_segiterator(FT_SEG_ITERATOR *); void maria_ft_parse_init(TREE *, CHARSET_INFO *); -int maria_ft_parse(TREE *, byte *, int, my_bool, struct st_mysql_ftparser *parser, - MYSQL_FTPARSER_PARAM *param); -FT_WORD * maria_ft_linearize(TREE *); -FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *); -uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *, my_bool, - MYSQL_FTPARSER_PARAM *param); +int maria_ft_parse(TREE *, byte *, int, struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *, MEM_ROOT *); +FT_WORD * maria_ft_linearize(TREE *, MEM_ROOT *); +FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *, MEM_ROOT *); +uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *, + MYSQL_FTPARSER_PARAM *, MEM_ROOT *); FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, byte *, uint, uint, byte *); FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, byte *, uint, CHARSET_INFO *); @@ -145,5 +148,6 @@ float maria_ft_boolean_get_relevance(FT_INFO *); my_off_t maria_ft_boolean_get_docid(FT_INFO *); void maria_ft_boolean_reinit_search(FT_INFO *); extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, - uint keynr); + uint keynr, + uint paramnr); extern void maria_ftparser_call_deinitializer(MARIA_HA *info); diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index 6a8c647aa7f..ecd51f5dc92 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -127,7 +127,7 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, } if (keyseg->flag & HA_VAR_LENGTH_PART) { - uint pack_length= keyseg->bit_start; + uint pack_length= (keyseg->bit_start == 1 ? 1 : 2); uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos)); pos+= pack_length; /* Skip VARCHAR length */ @@ -509,20 +509,19 @@ int _ma_read_key_record(MARIA_HA *info, my_off_t filepos, byte *buf) /* - Update auto_increment info + Retrieve auto_increment info SYNOPSIS - _ma_update_auto_increment() - info MARIA handler + retrieve_auto_increment() + info Maria handler record Row to update IMPLEMENTATION - Only replace the auto_increment value if it is higher than the previous - one. For signed columns we don't update the auto increment value if it's + For signed columns we don't retrieve the auto increment value if it's less than zero. */ -void _ma_update_auto_increment(MARIA_HA *info,const byte *record) +ulonglong ma_retrieve_auto_increment(MARIA_HA *info,const byte *record) { ulonglong value= 0; /* Store unsigned values here */ longlong s_value= 0; /* Store signed values here */ @@ -587,6 +586,5 @@ void _ma_update_auto_increment(MARIA_HA *info,const byte *record) and if s_value == 0 then value will contain either s_value or the correct value. */ - set_if_bigger(info->s->state.auto_increment, - (s_value > 0) ? (ulonglong) s_value : value); + return (s_value > 0) ? (ulonglong) s_value : value; } diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index c695cbb1a44..38e71a44f8b 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -95,7 +95,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) bzero((byte*) &info,sizeof(info)); my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, - MY_UNPACK_FILENAME|MY_APPEND_EXT),MYF(0)); + MY_UNPACK_FILENAME),MYF(0)); pthread_mutex_lock(&THR_LOCK_maria); if (!(old_info=_ma_test_if_reopen(name_buff))) { @@ -287,7 +287,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) &share->data_file_name,strlen(data_name)+1, &share->state.key_root,keys*sizeof(my_off_t), &share->state.key_del, - (share->state.header.max_block_size*sizeof(my_off_t)), + (share->state.header.max_block_size_index*sizeof(my_off_t)), #ifdef THREAD &share->key_root_lock,sizeof(rw_lock_t)*keys, #endif @@ -302,7 +302,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) (char*) key_root, sizeof(my_off_t)*keys); memcpy((char*) share->state.key_del, (char*) key_del, (sizeof(my_off_t) * - share->state.header.max_block_size)); + share->state.header.max_block_size_index)); strmov(share->unique_file_name, name_buff); share->unique_name_length= strlen(name_buff); strmov(share->index_file_name, index_name); @@ -799,7 +799,7 @@ uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite) uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; uchar *ptr=buff; uint i, keys= (uint) state->header.keys, - key_blocks=state->header.max_block_size; + key_blocks=state->header.max_block_size_index; DBUG_ENTER("_ma_state_info_write"); memcpy_fixed(ptr,&state->header,sizeof(state->header)); @@ -865,7 +865,7 @@ uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state) ptr +=sizeof(state->header); keys=(uint) state->header.keys; key_parts=mi_uint2korr(state->header.key_parts); - key_blocks=state->header.max_block_size; + key_blocks=state->header.max_block_size_index; state->open_count = mi_uint2korr(ptr); ptr +=2; state->changed= (bool) *ptr++; @@ -1038,7 +1038,7 @@ char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef) keydef->keylength = mi_uint2korr(ptr); ptr +=2; keydef->minlength = mi_uint2korr(ptr); ptr +=2; keydef->maxlength = mi_uint2korr(ptr); ptr +=2; - keydef->block_size = keydef->block_length/MARIA_MIN_KEY_BLOCK_LENGTH-1; + keydef->block_size_index = keydef->block_length/MARIA_MIN_KEY_BLOCK_LENGTH-1; keydef->underflow_block_length=keydef->block_length/3; keydef->version = 0; /* Not saved */ keydef->parser = &ft_default_parser; diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index 78864e2c9ac..054b8e16468 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -112,8 +112,8 @@ int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, DBUG_ENTER("_ma_dispose"); DBUG_PRINT("enter",("pos: %ld", (long) pos)); - old_link=info->s->state.key_del[keyinfo->block_size]; - info->s->state.key_del[keyinfo->block_size]=pos; + old_link= info->s->state.key_del[keyinfo->block_size_index]; + info->s->state.key_del[keyinfo->block_size_index]= pos; mi_sizestore(buff,old_link); info->s->state.changed|= STATE_NOT_SORTED_PAGES; DBUG_RETURN(key_cache_write(info->s->key_cache, @@ -132,7 +132,8 @@ my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) char buff[8]; DBUG_ENTER("_ma_new"); - if ((pos=info->s->state.key_del[keyinfo->block_size]) == HA_OFFSET_ERROR) + if ((pos= info->s->state.key_del[keyinfo->block_size_index]) == + HA_OFFSET_ERROR) { if (info->state->key_file_length >= info->s->base.max_key_file_length - keyinfo->block_length) @@ -152,7 +153,7 @@ my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) (uint) keyinfo->block_length,0)) pos= HA_OFFSET_ERROR; else - info->s->state.key_del[keyinfo->block_size]=mi_sizekorr(buff); + info->s->state.key_del[keyinfo->block_size_index]= mi_sizekorr(buff); } info->s->state.changed|= STATE_NOT_SORTED_PAGES; DBUG_PRINT("exit",("Pos: %ld",(long) pos)); diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c index abcce1a2582..2cb54a73b15 100644 --- a/storage/maria/ma_rkey.c +++ b/storage/maria/ma_rkey.c @@ -92,7 +92,18 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len if (!_ma_search(info, keyinfo, key_buff, use_key_length, maria_read_vec[search_flag], info->s->state.key_root[inx])) { - while (info->lastpos >= info->state->data_file_length) + /* + If we are searching for an exact key (including the data pointer) + and this was added by an concurrent insert, + then the result is "key not found". + */ + if ((search_flag == HA_READ_KEY_EXACT) && + (info->lastpos >= info->state->data_file_length)) + { + my_errno= HA_ERR_KEY_NOT_FOUND; + info->lastpos= HA_OFFSET_ERROR; + } + else while (info->lastpos >= info->state->data_file_length) { /* Skip rows that are inserted by other threads since we got a lock @@ -101,9 +112,9 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len */ if (_ma_search_next(info, keyinfo, info->lastkey, - info->lastkey_length, - maria_readnext_vec[search_flag], - info->s->state.key_root[inx])) + info->lastkey_length, + maria_readnext_vec[search_flag], + info->s->state.key_root[inx])) break; } } diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c index e30d41dd46c..09861c03c32 100644 --- a/storage/maria/ma_rsamepos.c +++ b/storage/maria/ma_rsamepos.c @@ -31,8 +31,9 @@ int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, my_off_t filepos) { DBUG_ENTER("maria_rsame_with_pos"); + DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos)); - if (inx < -1 || ! maria_is_key_active(info->s->state.key_map, inx)) + if (inx < -1 || (inx >= 0 && !maria_is_key_active(info->s->state.key_map, inx))) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c index ff10ae72027..83ced5b8167 100644 --- a/storage/maria/ma_rt_index.c +++ b/storage/maria/ma_rt_index.c @@ -183,9 +183,11 @@ int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key, uint key_leng return -1; } - /* Save searched key */ - memcpy(info->first_mbr_key, key, keyinfo->keylength - - info->s->base.rec_reflength); + /* + Save searched key, include data pointer. + The data pointer is required if the search_flag contains MBR_DATA. + */ + memcpy(info->first_mbr_key, key, keyinfo->keylength); info->last_rkey_length = key_length; info->maria_rtree_recursion_depth = -1; diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c index 83d3a0a2f1c..67b1d59f505 100644 --- a/storage/maria/ma_rt_mbr.c +++ b/storage/maria/ma_rt_mbr.c @@ -52,10 +52,14 @@ if (EQUAL_CMP(amin, amax, bmin, bmax)) \ return 1; \ } \ - else /* if (nextflag & MBR_DISJOINT) */ \ + else if (nextflag & MBR_DISJOINT) \ { \ if (DISJOINT_CMP(amin, amax, bmin, bmax)) \ return 1; \ + }\ + else /* if unknown comparison operator */ \ + { \ + DBUG_ASSERT(0); \ } #define RT_CMP_KORR(type, korr_func, len, nextflag) \ diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c index 3bb048ea239..af25be06a09 100644 --- a/storage/maria/ma_search.c +++ b/storage/maria/ma_search.c @@ -259,15 +259,16 @@ int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, { maria_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; - DBUG_PRINT("error",("Found wrong key: length: %u page: %lx end: %lx", - length, (long) page, (long) end)); + DBUG_PRINT("error", + ("Found wrong key: length: %u page: 0x%lx end: 0x%lx", + length, (long) page, (long) end)); DBUG_RETURN(MARIA_FOUND_WRONG_KEY); } if ((flag=ha_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag, not_used)) >= 0) break; #ifdef EXTRA_DEBUG - DBUG_PRINT("loop",("page: %lx key: '%s' flag: %d", (long) page, t_buff, + DBUG_PRINT("loop",("page: 0x%lx key: '%s' flag: %d", (long) page, t_buff, flag)); #endif memcpy(buff,t_buff,length); @@ -276,7 +277,7 @@ int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, if (flag == 0) memcpy(buff,t_buff,length); /* Result is first key */ *last_key= page == end; - DBUG_PRINT("exit",("flag: %d ret_pos: %lx", flag, (long) *ret_pos)); + DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos)); DBUG_RETURN(flag); } /* _ma_seq_search */ @@ -416,8 +417,9 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag { maria_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; - DBUG_PRINT("error",("Found wrong key: length: %u page: %lx end: %lx", - length, (long) page, (long) end)); + DBUG_PRINT("error", + ("Found wrong key: length: %u page: 0x%lx end: %lx", + length, (long) page, (long) end)); DBUG_RETURN(MARIA_FOUND_WRONG_KEY); } @@ -551,7 +553,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag *last_key= page == end; - DBUG_PRINT("exit",("flag: %d ret_pos: %lx", flag, (long) *ret_pos)); + DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos)); DBUG_RETURN(flag); } /* _ma_prefix_search */ @@ -813,7 +815,7 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, if (length > keyseg->length) { DBUG_PRINT("error", - ("Found too long null packed key: %u of %u at %lx", + ("Found too long null packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); DBUG_DUMP("key",(char*) *page_pos,16); maria_print_error(keyinfo->share, HA_ERR_CRASHED); @@ -870,7 +872,7 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, } if (length > (uint) keyseg->length) { - DBUG_PRINT("error",("Found too long packed key: %u of %u at %lx", + DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); DBUG_DUMP("key",(char*) *page_pos,16); maria_print_error(keyinfo->share, HA_ERR_CRASHED); @@ -936,8 +938,9 @@ uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, { if (length > keyinfo->maxlength) { - DBUG_PRINT("error",("Found too long binary packed key: %u of %u at %lx", - length, keyinfo->maxlength, (long) *page_pos)); + DBUG_PRINT("error", + ("Found too long binary packed key: %u of %u at 0x%lx", + length, keyinfo->maxlength, (long) *page_pos)); DBUG_DUMP("key",(char*) *page_pos,16); maria_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; @@ -984,7 +987,7 @@ uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, length-=tmp; from=page; from_end=page_end; } - DBUG_PRINT("info",("key: %lx from: %lx length: %u", + DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u", (long) key, (long) from, length)); memmove((byte*) key, (byte*) from, (size_t) length); key+=length; @@ -1042,7 +1045,7 @@ uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, } } } - DBUG_PRINT("exit",("page: %lx length: %u", (long) page, + DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page, *return_key_length)); DBUG_RETURN(page); } /* _ma_get_key */ @@ -1095,7 +1098,8 @@ uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, uint nod_flag; uchar *lastpos; DBUG_ENTER("_ma_get_last_key"); - DBUG_PRINT("enter",("page: %lx endpos: %lx", (long) page, (long) endpos)); + DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page, + (long) endpos)); nod_flag=_ma_test_if_nod(page); if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) @@ -1115,7 +1119,7 @@ uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey); if (*return_key_length == 0) { - DBUG_PRINT("error",("Couldn't find last key: page: %lx", + DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx", (long) page)); maria_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; @@ -1123,7 +1127,7 @@ uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, } } } - DBUG_PRINT("exit",("lastpos: %lx length: %u", (long) lastpos, + DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (long) lastpos, *return_key_length)); DBUG_RETURN(lastpos); } /* _ma_get_last_key */ @@ -1472,7 +1476,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, if (!*key++) { s_temp->key=key; - s_temp->ref_length=s_temp->key_length=0; + s_temp->key_length=0; s_temp->totlength=key_length-1+diff_flag; s_temp->next_key_pos=0; /* No next key */ return (s_temp->totlength); @@ -1627,12 +1631,12 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, s_temp->prev_length= org_key_length; s_temp->n_ref_length=s_temp->n_length= org_key_length; length+= org_key_length; - /* +get_pack_length(org_key_length); */ } return (int) length; } ref_length=n_length; + /* Get information about not packed key suffix */ get_key_pack_length(n_length,next_length_pack,next_key); /* Test if new keys has fewer characters that match the previous key */ @@ -1641,7 +1645,6 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, s_temp->part_of_prev_key= 0; s_temp->prev_length= ref_length; s_temp->n_ref_length= s_temp->n_length= n_length+ref_length; - /* s_temp->prev_key+= get_pack_length(org_key_length); */ return (int) length+ref_length-next_length_pack; } if (ref_length+pack_marker > new_ref_length) @@ -1652,9 +1655,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, s_temp->prev_length= ref_length - new_pack_length; s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length; s_temp->prev_key+= new_pack_length; -/* +get_pack_length(org_key_length); */ - length= length-get_pack_length(ref_length)+ - get_pack_length(new_pack_length); + length-= (next_length_pack - get_pack_length(s_temp->n_length)); return (int) length + s_temp->prev_length; } } @@ -1664,7 +1665,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, ref_length=0; next_length_pack=0; } - DBUG_PRINT("test",("length: %d next_key: %lx", length, + DBUG_PRINT("test",("length: %d next_key: 0x%lx", length, (long) next_key)); { diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index df547792ffa..5ae23c37261 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -442,6 +442,7 @@ err: close_cached_file(&info->tempfile_for_exceptions); ok: + free_root(&info->wordroot, MYF(0)); remove_io_thread(&info->read_cache); pthread_mutex_lock(&info->sort_info->mutex); info->sort_info->threads_running--; diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c index 621a6d90e65..69d432a5d95 100644 --- a/storage/maria/ma_test1.c +++ b/storage/maria/ma_test1.c @@ -96,6 +96,7 @@ static int run_test(const char *filename) /* Define a key over the first column */ keyinfo[0].seg=keyseg; keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ keyinfo[0].key_alg=HA_KEY_ALG_BTREE; keyinfo[0].seg[0].type= key_type; keyinfo[0].seg[0].flag= pack_seg; diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 7dd1ffeb7fd..840ecb2eeb7 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -96,6 +96,7 @@ int main(int argc, char *argv[]) keyinfo[0].key_alg=HA_KEY_ALG_BTREE; keyinfo[0].keysegs=1; keyinfo[0].flag = pack_type; + keyinfo[0].block_length= 0; /* Default block length */ keyinfo[1].seg= &glob_keyseg[1][0]; keyinfo[1].seg[0].start=7; keyinfo[1].seg[0].length=6; @@ -112,6 +113,7 @@ int main(int argc, char *argv[]) keyinfo[1].key_alg=HA_KEY_ALG_BTREE; keyinfo[1].keysegs=2; keyinfo[1].flag =0; + keyinfo[1].block_length= MARIA_MIN_KEY_BLOCK_LENGTH; /* Diff blocklength */ keyinfo[2].seg= &glob_keyseg[2][0]; keyinfo[2].seg[0].start=12; keyinfo[2].seg[0].length=8; @@ -122,6 +124,7 @@ int main(int argc, char *argv[]) keyinfo[2].key_alg=HA_KEY_ALG_BTREE; keyinfo[2].keysegs=1; keyinfo[2].flag =HA_NOSAME; + keyinfo[2].block_length= 0; /* Default block length */ keyinfo[3].seg= &glob_keyseg[3][0]; keyinfo[3].seg[0].start=0; keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0); @@ -133,6 +136,7 @@ int main(int argc, char *argv[]) keyinfo[3].key_alg=HA_KEY_ALG_BTREE; keyinfo[3].keysegs=1; keyinfo[3].flag = pack_type; + keyinfo[3].block_length= 0; /* Default block length */ keyinfo[4].seg= &glob_keyseg[4][0]; keyinfo[4].seg[0].start=0; keyinfo[4].seg[0].length=5; @@ -144,6 +148,7 @@ int main(int argc, char *argv[]) keyinfo[4].key_alg=HA_KEY_ALG_BTREE; keyinfo[4].keysegs=1; keyinfo[4].flag = pack_type; + keyinfo[4].block_length= 0; /* Default block length */ keyinfo[5].seg= &glob_keyseg[5][0]; keyinfo[5].seg[0].start=0; keyinfo[5].seg[0].length=4; @@ -155,6 +160,7 @@ int main(int argc, char *argv[]) keyinfo[5].key_alg=HA_KEY_ALG_BTREE; keyinfo[5].keysegs=1; keyinfo[5].flag = pack_type; + keyinfo[5].block_length= 0; /* Default block length */ recinfo[0].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; recinfo[0].length=7; @@ -701,7 +707,7 @@ int main(int argc, char *argv[]) if (!silent) printf("- maria_extra(CACHE) + maria_rrnd.... + maria_extra(NO_CACHE)\n"); - if (maria_extra(file,HA_EXTRA_RESET,0) || maria_extra(file,HA_EXTRA_CACHE,0)) + if (maria_reset(file) || maria_extra(file,HA_EXTRA_CACHE,0)) { if (locking || (!use_blob && !pack_fields)) { @@ -744,7 +750,7 @@ int main(int argc, char *argv[]) DBUG_PRINT("progpos",("Removing keys")); lastpos = HA_OFFSET_ERROR; /* DBUG_POP(); */ - maria_extra(file,HA_EXTRA_RESET,0); + maria_reset(file); found_parts=0; while ((error=maria_rrnd(file,read_record,HA_OFFSET_ERROR)) != HA_ERR_END_OF_FILE) @@ -911,13 +917,13 @@ static void get_options(int argc, char **argv) } break; case 'e': /* maria_block_length */ - if ((maria_block_size=atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || + if ((maria_block_size= atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || maria_block_size > MARIA_MAX_KEY_BLOCK_LENGTH) { fprintf(stderr,"Wrong maria_block_length\n"); exit(1); } - maria_block_size=1 << my_bit_log2(maria_block_size); + maria_block_size= my_round_up_to_next_power(maria_block_size); break; case 'E': /* maria_block_length */ if ((key_cache_block_size=atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || @@ -926,7 +932,7 @@ static void get_options(int argc, char **argv) fprintf(stderr,"Wrong key_cache_block_size\n"); exit(1); } - key_cache_block_size=1 << my_bit_log2(key_cache_block_size); + key_cache_block_size= my_round_up_to_next_power(key_cache_block_size); break; case 'f': if ((first_key=atoi(++pos)) < 0 || first_key >= MARIA_KEYS) diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c index d2fe8d90cf6..96b896b03c6 100644 --- a/storage/maria/ma_test3.c +++ b/storage/maria/ma_test3.c @@ -77,6 +77,7 @@ int main(int argc,char **argv) keyinfo[0].key_alg=HA_KEY_ALG_BTREE; keyinfo[0].keysegs=1; keyinfo[0].flag = (uint8) HA_PACK_KEY; + keyinfo[0].block_length= 0; /* Default block length */ keyinfo[1].seg= &keyseg[1][0]; keyinfo[1].seg[0].start=8; keyinfo[1].seg[0].length=4; /* Long is always 4 in maria */ @@ -85,6 +86,7 @@ int main(int argc,char **argv) keyinfo[1].key_alg=HA_KEY_ALG_BTREE; keyinfo[1].keysegs=1; keyinfo[1].flag =HA_NOSAME; + keyinfo[1].block_length= 0; /* Default block length */ recinfo[0].type=0; recinfo[0].length=sizeof(record.id); diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 3f1ba4b1fac..248b17ce2c9 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -164,7 +164,8 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) key_changed|= HA_STATE_CHANGED; /* Must update index file */ } if (auto_key_changed) - _ma_update_auto_increment(info,newrec); + set_if_bigger(info->s->state.auto_increment, + ma_retrieve_auto_increment(info, newrec)); if (share->calc_checksum) info->state->checksum+=(info->checksum - old_checksum); diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 5a1a540b88b..24768b36c89 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -145,7 +145,8 @@ int maria_write(MARIA_HA *info, byte *record) info->state->checksum+=info->checksum; } if (share->base.auto_key) - _ma_update_auto_increment(info,record); + set_if_bigger(info->s->state.auto_increment, + ma_retrieve_auto_increment(info, record)); info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); info->state->records++; diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 7f48a0805da..e423a3f5c36 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -34,10 +34,6 @@ SET_STACK_SIZE(9000) /* Minimum stack size for program */ #define my_raid_delete(A,B,C) my_delete(A,B) #endif -#ifdef OS2 -#define _sanity(a,b) -#endif - static uint decode_bits; static char **default_argv; static const char *load_default_groups[]= { "mariachk", 0 }; @@ -92,10 +88,6 @@ int main(int argc, char **argv) MY_INIT(argv[0]); my_progname_short= my_progname+dirname_length(my_progname); -#ifdef __EMX__ - _wildcard (&argc, &argv); -#endif - mariachk_init(&check_param); check_param.opt_lock_memory= 1; /* Lock memory if possible */ check_param.using_global_keycache = 0; @@ -381,7 +373,7 @@ static void usage(void) directly with '--variable-name=value'.\n\ -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\ specified, separated by "); -#if defined( __WIN__) || defined(OS2) || defined(__NETWARE__) +#if defined( __WIN__) || defined(__NETWARE__) printf("semicolon (;)"); #else printf("colon (:)"); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 9bab65126c7..ecd93807a06 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -55,7 +55,7 @@ typedef struct st_maria_state_info uchar keys; /* number of keys in file */ uchar uniques; /* number of UNIQUE definitions */ uchar language; /* Language for indexes */ - uchar max_block_size; /* max keyblock size */ + uchar max_block_size_index; /* max keyblock size */ uchar fulltext_keys; uchar not_used; /* To align to 8 */ } header; @@ -246,6 +246,7 @@ struct st_maria_info /* accumulate indexfile changes between write's */ TREE *bulk_insert; DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ + MEM_ROOT ft_memroot; /* used by the parser */ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ char *filename; /* parameter to open filename */ uchar *buff, /* Temp area for key */ @@ -398,7 +399,7 @@ struct st_maria_info #define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ #define MARIA_MAX_KEY_BLOCK_SIZE (MARIA_MAX_KEY_BLOCK_LENGTH/MARIA_MIN_KEY_BLOCK_LENGTH) -#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/maria_block_size+1)*maria_block_size) +#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size)) #define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ #define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ @@ -572,7 +573,7 @@ extern int _ma_read_key_record(MARIA_HA *info, my_off_t filepos, byte *buf); extern int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, uint length, int re_read_if_possibly); -extern void _ma_update_auto_increment(MARIA_HA *info, const byte *record); +extern ulonglong ma_retrieve_auto_increment(MARIA_HA *info, const byte *record); extern byte *_ma_alloc_rec_buff(MARIA_HA *, ulong, byte **); #define _ma_get_rec_buff_ptr(info,buf) \ diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c index eb5dec5aa3b..b840072aed0 100644 --- a/storage/maria/maria_ftdump.c +++ b/storage/maria/maria_ftdump.c @@ -34,20 +34,20 @@ static uint lengths[256]; static struct my_option my_long_options[] = { + {"help", 'h', "Display help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', "Synonym for -h.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"count", 'c', "Calculate per-word stats (counts and global weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"dump", 'd', "Dump index (incl. data offsets and word weights).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"length", 'l', "Report length distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"stats", 's', "Report global stats.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"verbose", 'v', "Be verbose.", (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"count", 'c', "Calculate per-word stats (counts and global weights).", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"length", 'l', "Report length distribution.", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"help", 'h', "Display help and exit.", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"help", '?', "Synonym for -h.", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -87,7 +87,8 @@ int main(int argc,char *argv[]) init_key_cache(maria_key_cache,MARIA_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); - if (!(info=maria_open(argv[0],2,HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) + if (!(info=maria_open(argv[0], O_RDONLY, + HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) { error=my_errno; goto err; diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index 514d8ba0bf5..c5a53b1ffac 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -2968,7 +2968,7 @@ static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length maria_clear_all_keys_active(share->state.key_map); for (key=0 ; key < share->base.keys ; key++) share->state.key_root[key]= HA_OFFSET_ERROR; - for (key=0 ; key < share->state.header.max_block_size ; key++) + for (key=0 ; key < share->state.header.max_block_size_index ; key++) share->state.key_del[key]= HA_OFFSET_ERROR; isam_file->state->checksum=crc; /* Save crc here */ share->changed=1; /* Force write of header */ @@ -3035,7 +3035,7 @@ static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) { isam_info= *(info->current=info->file); info->end=info->current+info->count; - maria_extra(isam_info, HA_EXTRA_RESET, 0); + maria_reset(isam_info); maria_extra(isam_info, HA_EXTRA_CACHE, 0); filepos=isam_info->s->pack.header_length; } @@ -3058,7 +3058,7 @@ static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) info->current++; isam_info= *info->current; filepos=isam_info->s->pack.header_length; - maria_extra(isam_info,HA_EXTRA_RESET, 0); + maria_reset(isam_info); maria_extra(isam_info,HA_EXTRA_CACHE, 0); } } From e8524352dc79e9231fd2c2d835bf0e479ba6e77a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Aug 2006 17:24:50 +0200 Subject: [PATCH 15/95] adding handler flag missing from previous merge, for Maria sql/ha_maria.cc: flag missing from previous merge --- sql/ha_maria.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/ha_maria.cc b/sql/ha_maria.cc index aa244450bb5..29718f1493e 100644 --- a/sql/ha_maria.cc +++ b/sql/ha_maria.cc @@ -155,7 +155,7 @@ handler(&maria_hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS | - HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | + HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), can_enable_indexes(1) {} From cd876fb11883f68f93027a70b5f3f99ad9234f27 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Aug 2006 19:19:47 +0200 Subject: [PATCH 16/95] amd64 atomic ops lock-free alloc (WL#3229), lock-free hash (WL#3230) bit functions made inline include/Makefile.am: lf.h added mysys/Makefile.am: lf_hash.c lf_dynarray.c lf_alloc-pin.c include/atomic/nolock.h: amd64 atomic ops include/atomic/rwlock.h: s/rw_lock/mutex/g include/atomic/x86-gcc.h: amd64 atomic ops try PAUSE include/my_global.h: STATIC_INLINE mysys/mf_keycache.c: make bit functions inline mysys/my_atomic.c: STATIC_INLINE mysys/my_bitmap.c: make bit functions inline sql/ha_myisam.cc: make bit functions inline sql/item_func.cc: make bit functions inline include/my_atomic.h: STATIC_INLINE mysys/my_bit.c: make bit functions inline sql/sql_select.cc: make bit functions inline storage/myisam/mi_create.c: make bit functions inline storage/myisam/mi_test2.c: make bit functions inline storage/myisam/myisamchk.c: make bit functions inline mysys/my_init.c: thread_size moved to mysys sql/mysql_priv.h: thread_size moved to mysys sql/set_var.cc: thread_size moved to mysys include/my_sys.h: thread_size moved to mysys sql/mysqld.cc: thread_size moved to mysys sql/sql_parse.cc: thread_size moved to mysys sql/sql_test.cc: thread_size moved to mysys include/lf.h: dylf_dynarray refactored to remove 65536 elements limit mysys/lf_alloc-pin.c: dylf_dynarray refactored to remove 65536 elements limit mysys/lf_dynarray.c: dylf_dynarray refactored to remove 65536 elements limit mysys/lf_hash.c: dylf_dynarray refactored to remove 65536 elements limit unittest/mysys/my_atomic-t.c: fix to commit (remove debug code) --- include/Makefile.am | 2 +- include/atomic/nolock.h | 23 +-- include/atomic/rwlock.h | 21 +- include/atomic/x86-gcc.h | 19 +- include/lf.h | 238 ++++++++++++++++++++++ include/my_atomic.h | 14 +- include/my_bit.h | 107 ++++++++++ include/my_global.h | 2 + include/my_sys.h | 7 +- mysys/Makefile.am | 3 +- mysys/lf_alloc-pin.c | 319 ++++++++++++++++++++++++++++++ mysys/lf_dynarray.c | 186 ++++++++++++++++++ mysys/lf_hash.c | 370 +++++++++++++++++++++++++++++++++++ mysys/mf_keycache.c | 1 + mysys/my_atomic.c | 7 +- mysys/my_bit.c | 100 +++------- mysys/my_bitmap.c | 1 + mysys/my_init.c | 1 + sql/ha_myisam.cc | 1 + sql/item_func.cc | 1 + sql/mysql_priv.h | 2 +- sql/mysqld.cc | 25 +-- sql/set_var.cc | 4 +- sql/sql_parse.cc | 4 +- sql/sql_select.cc | 1 + sql/sql_test.cc | 4 +- storage/myisam/mi_create.c | 3 +- storage/myisam/mi_test2.c | 1 + storage/myisam/myisamchk.c | 1 + unittest/mysys/my_atomic-t.c | 190 +++++++++++++++--- 30 files changed, 1506 insertions(+), 152 deletions(-) create mode 100644 include/lf.h create mode 100644 include/my_bit.h create mode 100644 mysys/lf_alloc-pin.c create mode 100644 mysys/lf_dynarray.c create mode 100644 mysys/lf_hash.c diff --git a/include/Makefile.am b/include/Makefile.am index 26161b36ab8..fe0ac1be31a 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -24,7 +24,7 @@ pkginclude_HEADERS = my_dbug.h m_string.h my_sys.h my_list.h my_xml.h \ sslopt-vars.h sslopt-case.h sql_common.h keycache.h \ mysql_time.h mysql/plugin.h $(BUILT_SOURCES) noinst_HEADERS = config-win.h config-netware.h \ - heap.h my_bitmap.h my_uctype.h \ + heap.h my_bitmap.h my_uctype.h lf.h \ myisam.h myisampack.h myisammrg.h ft_global.h\ mysys_err.h my_base.h help_start.h help_end.h \ my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \ diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index 1151a334b06..21f41484ac9 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -14,19 +14,20 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#if defined(__i386__) || defined(_M_IX86) +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) -#ifdef MY_ATOMIC_MODE_DUMMY -# define LOCK "" -#else -# define LOCK "lock" -#endif +# ifdef MY_ATOMIC_MODE_DUMMY +# define LOCK "" +# else +# define LOCK "lock" +# endif -#ifdef __GNUC__ -#include "x86-gcc.h" -#elif defined(_MSC_VER) -#include "x86-msvc.h" -#endif +# ifdef __GNUC__ +# include "x86-gcc.h" +# elif defined(_MSC_VER) +# error Broken! +# include "x86-msvc.h" +# endif #endif #ifdef make_atomic_cas_body diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h index 12d0dd3f069..3d8edb9e27e 100644 --- a/include/atomic/rwlock.h +++ b/include/atomic/rwlock.h @@ -14,7 +14,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; +typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t; #ifdef MY_ATOMIC_MODE_DUMMY /* @@ -32,13 +32,18 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; #define my_atomic_rwlock_wrunlock(name) #define MY_ATOMIC_MODE "dummy (non-atomic)" #else -#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw) -#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0) -#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw) -#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw) -#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define MY_ATOMIC_MODE "rwlocks" +/* + we're using read-write lock macros but map them to mutex locks, and they're + faster. Still, having semantically rich API we can change the + underlying implementation, if necessary. +*/ +#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw) +#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0) +#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw) +#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw) +#define MY_ATOMIC_MODE "mutex" #endif #define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav; diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index 5a72f318a61..3f0a82a9400 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -20,10 +20,18 @@ architectures support double-word (128-bit) cas. */ -#ifdef MY_ATOMIC_NO_XADD -#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +#ifdef __x86_64__ +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-amd64" LOCK "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-amd64" LOCK +# endif #else -#define MY_ATOMIC_MODE "gcc-x86" LOCK +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-x86" LOCK +# endif #endif /* fix -ansi errors while maintaining readability */ @@ -54,6 +62,9 @@ asm volatile (LOCK "; cmpxchg %2, %0" \ : "+m" (*a), "+a" (ret): "r" (ret)) #define make_atomic_store_body(S) \ - asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v)) + asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) #endif +/* TODO test on intel whether the below helps. on AMD it makes no difference */ +//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; }) + diff --git a/include/lf.h b/include/lf.h new file mode 100644 index 00000000000..299b4e07aa5 --- /dev/null +++ b/include/lf.h @@ -0,0 +1,238 @@ + +/* + TODO + 1. copyright + 6. reduce the number of memory barriers +*/ + +#ifndef _lf_h +#define _lf_h + +#include + +/* + Generic helpers +*/ + +#define lock_wrap(f,t,proto_args, args, lock) \ +t _ ## f proto_args; \ +static inline t f proto_args \ +{ \ + t ret; \ + my_atomic_rwlock_wrlock(lock); \ + ret= _ ## f args; \ + my_atomic_rwlock_wrunlock(lock); \ + return ret; \ +} + +#define lock_wrap_void(f,proto_args, args, lock) \ +void _ ## f proto_args; \ +static inline void f proto_args \ +{ \ + my_atomic_rwlock_wrlock(lock); \ + _ ## f args; \ + my_atomic_rwlock_wrunlock(lock); \ +} + +#define nolock_wrap(f,t,proto_args, args) \ +t _ ## f proto_args; \ +static inline t f proto_args \ +{ \ + return _ ## f args; \ +} + +#define nolock_wrap_void(f,proto_args, args) \ +void _ ## f proto_args; \ +static inline void f proto_args \ +{ \ + _ ## f args; \ +} + +/* + dynamic array + + 4 levels of 256 elements each mean 4311810304 elements in an array - it + should be enough for a while +*/ +#define LF_DYNARRAY_LEVEL_LENGTH 256 +#define LF_DYNARRAY_LEVELS 4 + +typedef struct { + void * volatile level[LF_DYNARRAY_LEVELS]; + uint size_of_element; + my_atomic_rwlock_t lock; +} LF_DYNARRAY; + +typedef int (*lf_dynarray_func)(void *, void *); + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size); +void lf_dynarray_end(LF_DYNARRAY *array); + +nolock_wrap(lf_dynarray_nr, int, + (LF_DYNARRAY *array, void *el), + (array,el)); + +nolock_wrap(lf_dynarray_value, void *, + (LF_DYNARRAY *array, uint idx), + (array,idx)); + +lock_wrap(lf_dynarray_lvalue, void *, + (LF_DYNARRAY *array, uint idx), + (array,idx), + &array->lock); +nolock_wrap(lf_dynarray_iterate, int, + (LF_DYNARRAY *array, lf_dynarray_func func, void *arg), + (array,func,arg)); + +/* + pin manager for memory allocator +*/ + +#define LF_PINBOX_PINS 3 +#define LF_PURGATORY_SIZE 11 + +typedef void lf_pinbox_free_func(void *, void *); + +typedef struct { + LF_DYNARRAY pinstack; + lf_pinbox_free_func *free_func; + void * free_func_arg; + uint32 volatile pinstack_top_ver; /* this is a versioned pointer */ + uint32 volatile pins_in_stack; /* number of elements in array */ +} LF_PINBOX; + +/* we want sizeof(LF_PINS) to be close to 128 to avoid false sharing */ +typedef struct { + void * volatile pin[LF_PINBOX_PINS]; + void * purgatory[LF_PURGATORY_SIZE]; + LF_PINBOX *pinbox; + uint32 purgatory_count; + uint32 volatile link; + char pad[128-sizeof(uint32)*2 + -sizeof(void *)*(LF_PINBOX_PINS+LF_PURGATORY_SIZE+1)]; +} LF_PINS; + +#define lf_lock_by_pins(PINS) \ + my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock) +#define lf_unlock_by_pins(PINS) \ + my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinstack.lock) + +/* + compile-time assert, to require "no less than N" pins + it's enough if it'll fail on at least one compiler, so + we'll enable it on GCC only, which supports zero-length arrays. +*/ +#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) +#define LF_REQUIRE_PINS(N) \ + static const char require_pins[LF_PINBOX_PINS-N]; \ + static const int LF_NUM_PINS_IN_THIS_FILE=N; +#define _lf_pin(PINS, PIN, ADDR) \ + ( \ + my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)), \ + assert(PIN < LF_NUM_PINS_IN_THIS_FILE) \ + ) +#else +#define LF_REQUIRE_PINS(N) +#define _lf_pin(PINS, PIN, ADDR) my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) +#endif + +#define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL) +#define lf_pin(PINS, PIN, ADDR) \ + do { \ + lf_lock_pins(PINS); \ + _lf_pin(PINS, PIN, ADDR); \ + lf_unlock_pins(PINS); \ + } while (0) +#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) + +void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, + void * free_func_arg); +void lf_pinbox_end(LF_PINBOX *pinbox); + +lock_wrap(lf_pinbox_get_pins, LF_PINS *, + (LF_PINBOX *pinbox), + (pinbox), + &pinbox->pinstack.lock); +lock_wrap_void(lf_pinbox_put_pins, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinstack.lock); +#if 0 +lock_wrap_void(lf_pinbox_real_free, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinstack.lock); +#endif +lock_wrap_void(lf_pinbox_free, + (LF_PINS *pins, void *addr), + (pins,addr), + &pins->pinbox->pinstack.lock); + +/* + memory allocator +*/ + +typedef struct st_lf_allocator { + LF_PINBOX pinbox; + void * volatile top; + uint element_size; + uint32 volatile mallocs; +} LF_ALLOCATOR; + +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size); +void lf_alloc_end(LF_ALLOCATOR *allocator); +uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); +#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) +#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) +#define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox) +#define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox) +#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS) +#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS) +#define lf_alloc_real_free(ALLOC,ADDR) my_free((gptr)(ADDR), MYF(0)) + +lock_wrap(lf_alloc_new, void *, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinstack.lock); + +/* + extendible hash +*/ +#include + +#define LF_HASH_UNIQUE 1 + +typedef struct { + LF_DYNARRAY array; /* hash itself */ + LF_ALLOCATOR alloc; /* allocator for elements */ + hash_get_key get_key; /* see HASH */ + CHARSET_INFO *charset; /* see HASH */ + uint key_offset, key_length; /* see HASH */ + uint element_size, flags; /* LF_HASH_UNIQUE, etc */ + int32 volatile size; /* size of array */ + int32 volatile count; /* number of elements in the hash */ +} LF_HASH; + +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset); +void lf_hash_end(LF_HASH *hash); +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data); +int lf_hash_search(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen); +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen); +#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc) +#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc) +#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS) +#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS) + +/* + cleanup +*/ + +#undef lock_wrap_void +#undef lock_wrap +#undef nolock_wrap_void +#undef nolock_wrap + +#endif + diff --git a/include/my_atomic.h b/include/my_atomic.h index 9a319f84451..d3e4e0055d3 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -36,7 +36,7 @@ #ifdef HAVE_INLINE #define make_atomic_add(S) \ -static inline int ## S my_atomic_add ## S( \ +STATIC_INLINE int ## S my_atomic_add ## S( \ int ## S volatile *a, int ## S v) \ { \ make_atomic_add_body(S); \ @@ -44,7 +44,7 @@ static inline int ## S my_atomic_add ## S( \ } #define make_atomic_swap(S) \ -static inline int ## S my_atomic_swap ## S( \ +STATIC_INLINE int ## S my_atomic_swap ## S( \ int ## S volatile *a, int ## S v) \ { \ make_atomic_swap_body(S); \ @@ -52,7 +52,7 @@ static inline int ## S my_atomic_swap ## S( \ } #define make_atomic_cas(S) \ -static inline int my_atomic_cas ## S(int ## S volatile *a, \ +STATIC_INLINE int my_atomic_cas ## S(int ## S volatile *a, \ int ## S *cmp, int ## S set) \ { \ int8 ret; \ @@ -61,7 +61,7 @@ static inline int my_atomic_cas ## S(int ## S volatile *a, \ } #define make_atomic_load(S) \ -static inline int ## S my_atomic_load ## S(int ## S volatile *a) \ +STATIC_INLINE int ## S my_atomic_load ## S(int ## S volatile *a) \ { \ int ## S ret; \ make_atomic_load_body(S); \ @@ -69,7 +69,7 @@ static inline int ## S my_atomic_load ## S(int ## S volatile *a) \ } #define make_atomic_store(S) \ -static inline void my_atomic_store ## S( \ +STATIC_INLINE void my_atomic_store ## S( \ int ## S volatile *a, int ## S v) \ { \ make_atomic_store_body(S); \ @@ -135,6 +135,10 @@ make_atomic_swap(ptr) #undef _atomic_h_cleanup_ #endif +#ifndef LF_BACKOFF +#define LF_BACKOFF (1) +#endif + #if SIZEOF_CHARP == SIZEOF_INT typedef int intptr; #elif SIZEOF_CHARP == SIZEOF_LONG diff --git a/include/my_bit.h b/include/my_bit.h new file mode 100644 index 00000000000..71bbe2d4ba3 --- /dev/null +++ b/include/my_bit.h @@ -0,0 +1,107 @@ +/* + Some useful bit functions +*/ + +#ifdef HAVE_INLINE + +extern const char _my_bits_nbits[256]; +extern const uchar _my_bits_reverse_table[256]; + +/* + Find smallest X in 2^X >= value + This can be used to divide a number with value by doing a shift instead +*/ + +STATIC_INLINE uint my_bit_log2(ulong value) +{ + uint bit; + for (bit=0 ; value > 1 ; value>>=1, bit++) ; + return bit; +} + +STATIC_INLINE uint my_count_bits(ulonglong v) +{ +#if SIZEOF_LONG_LONG > 4 + /* The following code is a bit faster on 16 bit machines than if we would + only shift v */ + ulong v2=(ulong) (v >> 32); + return (uint) (uchar) (_my_bits_nbits[(uchar) v] + + _my_bits_nbits[(uchar) (v >> 8)] + + _my_bits_nbits[(uchar) (v >> 16)] + + _my_bits_nbits[(uchar) (v >> 24)] + + _my_bits_nbits[(uchar) (v2)] + + _my_bits_nbits[(uchar) (v2 >> 8)] + + _my_bits_nbits[(uchar) (v2 >> 16)] + + _my_bits_nbits[(uchar) (v2 >> 24)]); +#else + return (uint) (uchar) (_my_bits_nbits[(uchar) v] + + _my_bits_nbits[(uchar) (v >> 8)] + + _my_bits_nbits[(uchar) (v >> 16)] + + _my_bits_nbits[(uchar) (v >> 24)]); +#endif +} + +STATIC_INLINE uint my_count_bits_ushort(ushort v) +{ + return _my_bits_nbits[v]; +} + + +/* + Next highest power of two + + SYNOPSIS + my_round_up_to_next_power() + v Value to check + + RETURN + Next or equal power of 2 + Note: 0 will return 0 + + NOTES + Algorithm by Sean Anderson, according to: + http://graphics.stanford.edu/~seander/bithacks.html + (Orignal code public domain) + + Comments shows how this works with 01100000000000000000000000001011 +*/ + +STATIC_INLINE uint32 my_round_up_to_next_power(uint32 v) +{ + v--; /* 01100000000000000000000000001010 */ + v|= v >> 1; /* 01110000000000000000000000001111 */ + v|= v >> 2; /* 01111100000000000000000000001111 */ + v|= v >> 4; /* 01111111110000000000000000001111 */ + v|= v >> 8; /* 01111111111111111100000000001111 */ + v|= v >> 16; /* 01111111111111111111111111111111 */ + return v+1; /* 10000000000000000000000000000000 */ +} + +STATIC_INLINE uint32 my_clear_highest_bit(uint32 v) +{ + uint32 w=v >> 1; + w|= w >> 1; + w|= w >> 2; + w|= w >> 4; + w|= w >> 8; + w|= w >> 16; + return v & w; +} + +STATIC_INLINE uint32 my_reverse_bits(uint key) +{ + return + (_my_bits_reverse_table[ key & 255] << 24) | + (_my_bits_reverse_table[(key>> 8) & 255] << 16) | + (_my_bits_reverse_table[(key>>16) & 255] << 8) | + _my_bits_reverse_table[(key>>24) ]; +} + +#else +extern uint my_bit_log2(ulong value); +extern uint32 my_round_up_to_next_power(uint32 v); +uint32 my_clear_highest_bit(uint32 v); +uint32 my_reverse_bits(uint key); +extern uint my_count_bits(ulonglong v); +extern uint my_count_bits_ushort(ushort v); +#endif diff --git a/include/my_global.h b/include/my_global.h index 21b2251af1d..563ef379252 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -192,6 +192,8 @@ #endif #undef inline_test_2 #undef inline_test_1 +/* helper macro for "instantiating" inline functions */ +#define STATIC_INLINE static inline /* The following macros are used to control inlining a bit more than diff --git a/include/my_sys.h b/include/my_sys.h index 2dc4053f70d..81e5380be96 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -216,6 +216,7 @@ extern int (*error_handler_hook)(uint my_err, const char *str,myf MyFlags); extern int (*fatal_error_handler_hook)(uint my_err, const char *str, myf MyFlags); extern uint my_file_limit; +extern ulong my_thread_stack_size; #ifdef HAVE_LARGE_PAGES extern my_bool my_use_large_pages; @@ -821,10 +822,6 @@ extern int packfrm(const void *, uint, const void **, uint *); extern int unpackfrm(const void **, uint *, const void *); extern ha_checksum my_checksum(ha_checksum crc, const byte *mem, uint count); -extern uint my_bit_log2(ulong value); -extern uint32 my_round_up_to_next_power(uint32 v); -extern uint my_count_bits(ulonglong v); -extern uint my_count_bits_ushort(ushort v); extern void my_sleep(ulong m_seconds); extern ulong crc32(ulong crc, const uchar *buf, uint len); extern uint my_set_max_open_files(uint files); @@ -840,7 +837,7 @@ extern int my_getncpus(); #ifndef MAP_NOSYNC #define MAP_NOSYNC 0 #endif -#ifndef MAP_NORESERVE +#ifndef MAP_NORESERVE #define MAP_NORESERVE 0 /* For irix and AIX */ #endif diff --git a/mysys/Makefile.am b/mysys/Makefile.am index b209d64e78f..d870437573a 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -31,7 +31,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \ my_malloc.c my_realloc.c my_once.c mulalloc.c \ my_alloc.c safemalloc.c my_new.cc \ - my_vle.c my_atomic.c \ + my_vle.c my_atomic.c lf_hash.c \ + lf_dynarray.c lf_alloc-pin.c \ my_fopen.c my_fstream.c my_getsystime.c \ my_error.c errors.c my_div.c my_messnc.c \ mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \ diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c new file mode 100644 index 00000000000..a9ea1802c03 --- /dev/null +++ b/mysys/lf_alloc-pin.c @@ -0,0 +1,319 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + concurrent allocator based on pinning addresses + + strictly speaking it's not lock-free, as it can be blocked + if a thread's purgatory is full and all addresses from there + are pinned. + + But until the above happens, it's wait-free. + + It can be made strictly wait-free by increasing purgatory size. + If it's larger than pins_in_stack*LF_PINBOX_PINS, then apocalyptical + condition above will never happen. But than the memory requirements + will be O(pins_in_stack^2). + + Note, that for large purgatory sizes it makes sense to remove + purgatory array, and link objects in a list using embedded pointer. + + TODO test with more than 256 threads + TODO test w/o alloca +*/ + +#include +#include +#include + +#define LF_PINBOX_MAX_PINS 65536 + +static void _lf_pinbox_real_free(LF_PINS *pins); + +void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, + void *free_func_arg) +{ + DBUG_ASSERT(sizeof(LF_PINS) == 128); + lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS)); + pinbox->pinstack_top_ver=0; + pinbox->pins_in_stack=0; + pinbox->free_func=free_func; + pinbox->free_func_arg=free_func_arg; +} + +void lf_pinbox_end(LF_PINBOX *pinbox) +{ + lf_dynarray_end(&pinbox->pinstack); +} + +LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) +{ + uint32 pins, next, top_ver; + LF_PINS *el; + + top_ver=pinbox->pinstack_top_ver; + do + { + if (!(pins=top_ver % LF_PINBOX_MAX_PINS)) + { + pins=my_atomic_add32(&pinbox->pins_in_stack, 1)+1; + el=(LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); + break; + } + el=(LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); + next=el->link; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins+next+LF_PINBOX_MAX_PINS)); + el->link=pins; + el->purgatory_count=0; + el->pinbox=pinbox; + return el; +} + +void _lf_pinbox_put_pins(LF_PINS *pins) +{ + LF_PINBOX *pinbox=pins->pinbox; + uint32 top_ver, nr; + nr=pins->link; +#ifdef MY_LF_EXTRA_DEBUG + { + int i; + for (i=0; i < LF_PINBOX_PINS; i++) + assert(pins->pin[i] == 0); + } +#endif + while (pins->purgatory_count) + { + _lf_pinbox_real_free(pins); + if (pins->purgatory_count && my_getncpus() == 1) + { + my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); + pthread_yield(); + my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); + } + } + top_ver=pinbox->pinstack_top_ver; + if (nr == pinbox->pins_in_stack) + { + int32 tmp=nr; + if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1)) + goto ret; + } + + do + { + pins->link=top_ver % LF_PINBOX_MAX_PINS; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); +ret: + return; +} + +static int ptr_cmp(void **a, void **b) +{ + return *a < *b ? -1 : *a == *b ? 0 : 1; +} + +void _lf_pinbox_free(LF_PINS *pins, void *addr) +{ + while (pins->purgatory_count == LF_PURGATORY_SIZE) + { + _lf_pinbox_real_free(pins); + if (pins->purgatory_count == LF_PURGATORY_SIZE && my_getncpus() == 1) + { + my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); + pthread_yield(); + my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); + } + } + pins->purgatory[pins->purgatory_count++]=addr; +} + +struct st_harvester { + void **granary; + int npins; +}; + +static int harvest_pins(LF_PINS *el, struct st_harvester *hv) +{ + int i; + LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); + for (; el < el_end; el++) + { + for (i= 0; i < LF_PINBOX_PINS; i++) + { + void *p= el->pin[i]; + if (p) + *hv->granary++= p; + } + } + hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; + return 0; +} + +static int match_pins(LF_PINS *el, void *addr) +{ + int i; + LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; + for (; el < el_end; el++) + for (i= 0; i < LF_PINBOX_PINS; i++) + if (el->pin[i] == addr) + return 1; + return 0; +} + +static void _lf_pinbox_real_free(LF_PINS *pins) +{ + int npins; + void **addr=0; + void **start, **cur, **end=pins->purgatory+pins->purgatory_count; + LF_PINBOX *pinbox=pins->pinbox; + + npins=pinbox->pins_in_stack+1; + +#ifdef HAVE_ALLOCA + /* create a sorted list of pinned addresses, to speed up searches */ + if (sizeof(void *)*LF_PINBOX_PINS*npins < my_thread_stack_size) + { + struct st_harvester hv; + addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); + hv.granary=addr; + hv.npins=npins; + _lf_dynarray_iterate(&pinbox->pinstack, + (lf_dynarray_func)harvest_pins, &hv); + + npins=hv.granary-addr; + if (npins) + qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); + } +#endif + + start= cur= pins->purgatory; + end= start+pins->purgatory_count; + for (; cur < end; cur++) + { + if (npins) + { + if (addr) + { + void **a,**b,**c; + for (a=addr, b=addr+npins-1, c=a+(b-a)/2; b-a>1; c=a+(b-a)/2) + if (*cur == *c) + a=b=c; + else if (*cur > *c) + a=c; + else + b=c; + if (*cur == *a || *cur == *b) + goto found; + } + else + { + if (_lf_dynarray_iterate(&pinbox->pinstack, + (lf_dynarray_func)match_pins, *cur)) + goto found; + } + } + /* not pinned - freeing */ + pinbox->free_func(*cur, pinbox->free_func_arg); + continue; +found: + /* pinned - keeping */ + *start++=*cur; + } + pins->purgatory_count=start-pins->purgatory; +#ifdef MY_LF_EXTRA_DEBUG + while (start < pins->purgatory + LF_PURGATORY_SIZE) + *start++=0; +#endif +} + +static void alloc_free(void *node, LF_ALLOCATOR *allocator) +{ + void *tmp; + tmp=allocator->top; + do + { + (*(void **)node)=tmp; + } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && + LF_BACKOFF); +} + +LF_REQUIRE_PINS(1); +void *_lf_alloc_new(LF_PINS *pins) +{ + LF_ALLOCATOR *allocator=(LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + void *node; + for (;;) + { + do + { + node=allocator->top; + _lf_pin(pins, 0, node); + } while (node !=allocator->top && LF_BACKOFF); + if (!node) + { + if (!(node=my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + goto ret; +#ifdef MY_LF_EXTRA_DEBUG + my_atomic_add32(&allocator->mallocs, 1); +#endif + goto ret; + } + if (my_atomic_casptr((void **)&allocator->top, + (void *)&node, *(void **)node)) + goto ret; + } +ret: + _lf_unpin(pins, 0); + return node; +} + +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size) +{ + lf_pinbox_init(&allocator->pinbox, + (lf_pinbox_free_func *)alloc_free, allocator); + allocator->top=0; + allocator->mallocs=0; + allocator->element_size=size; + DBUG_ASSERT(size >= (int)sizeof(void *)); +} + +void lf_alloc_end(LF_ALLOCATOR *allocator) +{ + void *el=allocator->top; + while (el) + { + void *tmp=*(void **)el; + my_free(el, MYF(0)); + el=tmp; + } + lf_pinbox_end(&allocator->pinbox); + allocator->top=0; +} + +/* + NOTE + this is NOT thread-safe !!! +*/ +uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) +{ + uint i; + void *node; + for (node=allocator->top, i=0; node; node=*(void **)node, i++) /* no op */; + return i; +} + diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c new file mode 100644 index 00000000000..dcf99163ca1 --- /dev/null +++ b/mysys/lf_dynarray.c @@ -0,0 +1,186 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Analog of DYNAMIC_ARRAY that never reallocs + (so no pointer into the array may ever become invalid). + + Memory is allocated in non-contiguous chunks. + This data structure is not space efficient for sparce arrays. + + The number of elements is limited to 2^16 + + Every element is aligned to sizeof(element) boundary + (to avoid false sharing if element is big enough). + + Actually, it's wait-free, not lock-free ;-) +*/ + +#undef DBUG_OFF +#include +#include +#include +#include + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) +{ + bzero(array, sizeof(*array)); + array->size_of_element=element_size; + my_atomic_rwlock_init(&array->lock); +} + +static void recursive_free(void **alloc, int level) +{ + if (!alloc) return; + + if (level) + { + int i; + for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + recursive_free(alloc[i], level-1); + my_free((void *)alloc, MYF(0)); + } + else + my_free(alloc[-1], MYF(0)); +} + +void lf_dynarray_end(LF_DYNARRAY *array) +{ + int i; + for (i=0; i < LF_DYNARRAY_LEVELS; i++) + recursive_free(array->level[i], i); + my_atomic_rwlock_destroy(&array->lock); + bzero(array, sizeof(*array)); +} + +static const int dynarray_idxes_in_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH +}; + +void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr=0; + int i; + + for (i=3; i > 0; i--) + { + if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) + { + if (!ptr_ptr) + { + ptr_ptr=&array->level[i]; + idx-= dynarray_idxes_in_level[i]; + } + ptr=*ptr_ptr; + if (!ptr) + { + void *alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), + MYF(MY_WME|MY_ZEROFILL)); + if (!alloc) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc, MYF(0)); + } + ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; + idx%= dynarray_idxes_in_level[i]; + } + } + if (!ptr_ptr) + ptr_ptr=&array->level[0]; + ptr=*ptr_ptr; + if (!ptr) + { + void *alloc, *data; + alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + max(array->size_of_element, sizeof(void *)), + MYF(MY_WME|MY_ZEROFILL)); + if (!alloc) + return(NULL); + /* reserve the space for free() address */ + data= alloc + sizeof(void *); + { /* alignment */ + intptr mod= ((intptr)data) % array->size_of_element; + if (mod) + data+= array->size_of_element - mod; + } + ((void **)data)[-1]=alloc; /* free() will need the original pointer */ + if (my_atomic_casptr(ptr_ptr, &ptr, data)) + ptr= data; + else + my_free(alloc, MYF(0)); + } + return ptr + array->size_of_element * idx; +} + +void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr=0; + int i; + + for (i=3; i > 0; i--) + { + if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) + { + if (!ptr_ptr) + { + ptr_ptr=&array->level[i]; + idx-= dynarray_idxes_in_level[i]; + } + ptr=*ptr_ptr; + if (!ptr) + return(NULL); + ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; + idx %= dynarray_idxes_in_level[i]; + } + } + if (!ptr_ptr) + ptr_ptr=&array->level[0]; + ptr=*ptr_ptr; + if (!ptr) + return(NULL); + return ptr + array->size_of_element * idx; +} + +static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, + lf_dynarray_func func, void *arg) +{ + int res, i; + if (!ptr) + return 0; + if (!level) + return func(ptr, arg); + for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res=recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + return res; + return 0; +} + +int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) +{ + int i, res; + for (i=0; i < LF_DYNARRAY_LEVELS; i++) + if ((res=recursive_iterate(array, array->level[i], i, func, arg))) + return res; + return 0; +} + diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c new file mode 100644 index 00000000000..6d3d30ebc3f --- /dev/null +++ b/mysys/lf_hash.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + extensible hash + + TODO + dummy nodes use element_size=0 + try to get rid of dummy nodes ? +*/ +#include +#include +#include +#include + +LF_REQUIRE_PINS(3); + +typedef struct { + intptr volatile link; + uint32 hashnr; + const uchar *key; + uint keylen; +} LF_SLIST; + +typedef struct { + intptr volatile *prev; + LF_SLIST *curr, *next; +} CURSOR; + +#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + RETURN + 0 - not found + 1 - found + + NOTE + cursor is positioned in either case + pins[0..2] are used, they are NOT removed on return +*/ +static int lfind(intptr volatile *head, uint32 hashnr, + const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) +{ + uint32 cur_hashnr; + const uchar *cur_key; + uint cur_keylen; + intptr link; + +retry: + cursor->prev=head; + do { + cursor->curr=PTR(*cursor->prev); + _lf_pin(pins,1,cursor->curr); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (!cursor->curr) + return 0; + do { // XXX or goto retry ? + link=cursor->curr->link; + cursor->next=PTR(link); + _lf_pin(pins, 0, cursor->next); + } while(link != cursor->curr->link && LF_BACKOFF); + cur_hashnr=cursor->curr->hashnr; + cur_key=cursor->curr->key; + cur_keylen=cursor->curr->keylen; + if (*cursor->prev != (intptr)cursor->curr) + { + LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr >= hashnr) + { + int r=1; + if (cur_hashnr > hashnr || (r=memcmp(cur_key, key, keylen)) >= 0) + return !r; + } + cursor->prev=&(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + LF_BACKOFF; + goto retry; + } + } + cursor->curr=cursor->next; + _lf_pin(pins, 1, cursor->curr); + } +} + +/* + RETURN + 0 - inserted + not 0 - a pointer to a conflict + + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, + LF_PINS *pins, uint flags) +{ + CURSOR cursor; + int res=-1; + + do + { + if (lfind((intptr*)head, node->hashnr, node->key, node->keylen, + &cursor, pins) && + (flags & LF_HASH_UNIQUE)) + res=0; + else + { + node->link=(intptr)cursor.curr; + assert(node->link != (intptr)node); + assert(cursor.prev != &node->link); + if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + res=1; + } + } while (res == -1); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res ? 0 : cursor.curr; +} + +/* + RETURN + 0 - ok + 1 - not found + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, + const uchar *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res=-1; + + do + { + if (!lfind((intptr *)head, hashnr, key, keylen, &cursor, pins)) + res= 1; + else + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, 1+(char *)cursor.next)) + { + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + res= 0; + } + } while (res == -1); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res; +} + +/* + RETURN + 0 - not found + node - found + NOTE + it uses pins[0..2], on return the pin[2] keeps the node found + all other pins are removed. +*/ +static LF_SLIST *lsearch(LF_SLIST * volatile *head, uint32 hashnr, + const uchar *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res=lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + if (res) _lf_pin(pins, 2, cursor.curr); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + return res ? cursor.curr : 0; +} + +static inline const uchar* hash_key(const LF_HASH *hash, + const uchar *record, uint *length) +{ + if (hash->get_key) + return (*hash->get_key)(record,length,0); + *length=hash->key_length; + return record + hash->key_offset; +} + +static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) +{ + ulong nr1=1, nr2=4; + hash->charset->coll->hash_sort(hash->charset,key,keylen,&nr1,&nr2); + return nr1 & INT_MAX32; +} + +#define MAX_LOAD 1 +static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); + +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset) +{ + lf_alloc_init(&hash->alloc,sizeof(LF_SLIST)+element_size); + lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); + hash->size=1; + hash->count=0; + hash->element_size=element_size; + hash->flags=flags; + hash->charset=charset; + hash->key_offset=key_offset; + hash->key_length=key_length; + hash->get_key=get_key; + DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); +} + +void lf_hash_end(LF_HASH *hash) +{ + LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + while (el) + { + intptr next=el->link; + lf_alloc_real_free(&hash->alloc, el); + el=(LF_SLIST *)next; + } + lf_alloc_end(&hash->alloc); + lf_dynarray_end(&hash->array); +} + +/* + NOTE + see linsert() for pin usage +*/ +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) +{ + uint csize, bucket, hashnr, keylen; + LF_SLIST *node, * volatile *el; + const uchar *key; + + key= hash_key(hash, data, &keylen); + hashnr= calc_hash(hash, key, keylen); + bucket= hashnr % hash->size; + lf_lock_by_pins(pins); + node=(LF_SLIST *)_lf_alloc_new(pins); + memcpy(node+1, data, hash->element_size); + el=_lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + node->hashnr=my_reverse_bits(hashnr) | 1; + node->key=((char *)(node+1))+(key-(uchar *)data); + node->keylen=keylen; + if (linsert(el, node, pins, hash->flags)) + { + _lf_alloc_free(pins, node); + lf_unlock_by_pins(pins); + return 0; + } + csize= hash->size; + if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&hash->size, &csize, csize*2); +#if 0 + node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + hashnr=0; + while (node) + { + assert (node->hashnr >= hashnr); + hashnr=node->hashnr; + node=(LF_SLIST *)node->link; + } +#endif + lf_unlock_by_pins(pins); + return 1; +} + +/* + NOTE + see ldelete() for pin usage +*/ +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +{ + LF_SLIST * volatile *el; + uint bucket, hashnr=calc_hash(hash, key, keylen); + + bucket= hashnr % hash->size; + lf_lock_by_pins(pins); + el=_lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + if (ldelete(el, my_reverse_bits(hashnr) | 1, key, keylen, pins)) + { + lf_unlock_by_pins(pins); + return 0; + } + my_atomic_add32(&hash->count, -1); +#if 0 + { + LF_SLIST *node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + hashnr=0; + while (node) + { + assert (node->hashnr >= hashnr); + hashnr=node->hashnr; + node=(LF_SLIST *)node->link; + } + } +#endif + lf_unlock_by_pins(pins); + return 1; +} + +/* + NOTE + see lsearch() for pin usage +*/ +int lf_hash_search(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +{ + int res; + LF_SLIST * volatile *el; + uint bucket, hashnr=calc_hash(hash, key, keylen); + + bucket= hashnr % hash->size; + lf_lock_by_pins(pins); + el=_lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + res=NULL != lsearch(el, my_reverse_bits(hashnr) | 1, key, keylen, pins); + lf_unlock_by_pins(pins); + return res; +} + +static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, + uint bucket, LF_PINS *pins) +{ + uint parent= my_clear_highest_bit(bucket); + LF_SLIST *dummy=_lf_alloc_new(pins), **tmp=0, *cur; + LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); + if (*el == NULL && bucket) + initialize_bucket(hash, el, parent, pins); + dummy->hashnr=my_reverse_bits(bucket); + LINT_INIT(dummy->key); + LINT_INIT(dummy->keylen); + if ((cur= linsert(el, dummy, pins, 0))) + { + _lf_alloc_free(pins, dummy); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); +} + diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index e6f4348968f..9a99a278bc5 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -43,6 +43,7 @@ #include #include "my_static.h" #include +#include #include #include diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index 6f56d76e6b8..2908a44961a 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -18,11 +18,10 @@ #include #ifndef HAVE_INLINE -/* - the following will cause all inline functions to be instantiated -*/ +/* the following will cause all inline functions to be instantiated */ #define HAVE_INLINE -#define static extern +#undef STATIC_INLINE +#define STATIC_INLINE extern #endif #include diff --git a/mysys/my_bit.c b/mysys/my_bit.c index 6ef0e171695..11d98f5f6ae 100644 --- a/mysys/my_bit.c +++ b/mysys/my_bit.c @@ -14,23 +14,18 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* Some useful bit functions */ +#include -#include "mysys_priv.h" +#ifndef HAVE_INLINE +/* the following will cause all inline functions to be instantiated */ +#define HAVE_INLINE +#undef STATIC_INLINE +#define STATIC_INLINE extern +#endif -/* - Find smallest X in 2^X >= value - This can be used to divide a number with value by doing a shift instead -*/ +#include -uint my_bit_log2(ulong value) -{ - uint bit; - for (bit=0 ; value > 1 ; value>>=1, bit++) ; - return bit; -} - -static char nbits[256] = { +const char _my_bits_nbits[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, @@ -49,60 +44,29 @@ static char nbits[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -uint my_count_bits(ulonglong v) -{ -#if SIZEOF_LONG_LONG > 4 - /* The following code is a bit faster on 16 bit machines than if we would - only shift v */ - ulong v2=(ulong) (v >> 32); - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)] + - nbits[(uchar) (v2)] + - nbits[(uchar) (v2 >> 8)] + - nbits[(uchar) (v2 >> 16)] + - nbits[(uchar) (v2 >> 24)]); -#else - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)]); -#endif -} - -uint my_count_bits_ushort(ushort v) -{ - return nbits[v]; -} - - /* - Next highest power of two - - SYNOPSIS - my_round_up_to_next_power() - v Value to check - - RETURN - Next or equal power of 2 - Note: 0 will return 0 - - NOTES - Algorithm by Sean Anderson, according to: - http://graphics.stanford.edu/~seander/bithacks.html - (Orignal code public domain) - - Comments shows how this works with 01100000000000000000000000001011 + perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)' */ +const uchar _my_bits_reverse_table[256]={ +0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, +0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, +0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, +0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, +0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, +0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, +0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, +0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, +0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, +0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, +0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, +0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, +0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, +0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, +0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, +0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, +0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, +0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, +0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, +0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; -uint32 my_round_up_to_next_power(uint32 v) -{ - v--; /* 01100000000000000000000000001010 */ - v|= v >> 1; /* 01110000000000000000000000001111 */ - v|= v >> 2; /* 01111100000000000000000000001111 */ - v|= v >> 4; /* 01111111110000000000000000001111 */ - v|= v >> 8; /* 01111111111111111100000000001111 */ - v|= v >> 16; /* 01111111111111111111111111111111 */ - return v+1; /* 10000000000000000000000000000000 */ -} diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index 2c85ce0bf04..8cf8ac0e97c 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -39,6 +39,7 @@ #include "mysys_priv.h" #include #include +#include void create_last_word_mask(MY_BITMAP *map) { diff --git a/mysys/my_init.c b/mysys/my_init.c index dca68637161..a1dca635054 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -44,6 +44,7 @@ static void netware_init(); my_bool my_init_done= 0; uint mysys_usage_id= 0; /* Incremented for each my_init() */ +ulong my_thread_stack_size= 65536; static ulong atoi_octal(const char *str) { diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 2d097c34f97..983ebdc2ab8 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -21,6 +21,7 @@ #include "mysql_priv.h" #include +#include #include #include "ha_myisam.h" #include diff --git a/sql/item_func.cc b/sql/item_func.cc index 8139ba81777..190d4091beb 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include "sp_head.h" #include "sp_rcontext.h" diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index c0b453b7d69..527c8fb513f 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1497,7 +1497,7 @@ extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong slave_net_timeout, slave_trans_retries; extern uint max_user_connections; extern ulong what_to_log,flush_time; -extern ulong query_buff_size, thread_stack; +extern ulong query_buff_size; extern ulong max_prepared_stmt_count, prepared_stmt_count; extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit; extern ulong max_binlog_size, max_relay_log_size; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 6e57993a61a..279ddc2c5d5 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -17,6 +17,7 @@ #include "mysql_priv.h" #include #include +#include #include "slave.h" #include "sql_repl.h" #include "rpl_filter.h" @@ -480,7 +481,7 @@ uint volatile thread_count, thread_running; ulonglong thd_startup_options; ulong back_log, connect_timeout, concurrency, server_id; ulong table_cache_size, table_def_size; -ulong thread_stack, what_to_log; +ulong what_to_log; ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong open_files_limit, max_binlog_size, max_relay_log_size; ulong slave_net_timeout, slave_trans_retries; @@ -2142,7 +2143,7 @@ the thread stack. Please read http://www.mysql.com/doc/en/Linux.html\n\n", { fprintf(stderr,"thd=%p\n",thd); print_stacktrace(thd ? (gptr) thd->thread_stack : (gptr) 0, - thread_stack); + my_thread_stack_size); } if (thd) { @@ -2274,9 +2275,9 @@ static void start_signal_handler(void) Peculiar things with ia64 platforms - it seems we only have half the stack size in reality, so we have to double it here */ - pthread_attr_setstacksize(&thr_attr,thread_stack*2); + pthread_attr_setstacksize(&thr_attr,my_thread_stack_size*2); #else - pthread_attr_setstacksize(&thr_attr,thread_stack); + pthread_attr_setstacksize(&thr_attr,my_thread_stack_size); #endif #endif @@ -3493,9 +3494,9 @@ int main(int argc, char **argv) Peculiar things with ia64 platforms - it seems we only have half the stack size in reality, so we have to double it here */ - pthread_attr_setstacksize(&connection_attrib,thread_stack*2); + pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size*2); #else - pthread_attr_setstacksize(&connection_attrib,thread_stack); + pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size); #endif #ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE { @@ -3506,15 +3507,15 @@ int main(int argc, char **argv) stack_size/= 2; #endif /* We must check if stack_size = 0 as Solaris 2.9 can return 0 here */ - if (stack_size && stack_size < thread_stack) + if (stack_size && stack_size < my_thread_stack_size) { if (global_system_variables.log_warnings) sql_print_warning("Asked for %ld thread stack, but got %ld", - thread_stack, stack_size); + my_thread_stack_size, stack_size); #if defined(__ia64__) || defined(__ia64) - thread_stack= stack_size*2; + my_thread_stack_size= stack_size*2; #else - thread_stack= stack_size; + my_thread_stack_size= stack_size; #endif } } @@ -6276,8 +6277,8 @@ The minimum value for this variable is 4096.", (gptr*) &concurrency, (gptr*) &concurrency, 0, GET_ULONG, REQUIRED_ARG, DEFAULT_CONCURRENCY, 1, 512, 0, 1, 0}, {"thread_stack", OPT_THREAD_STACK, - "The stack size for each thread.", (gptr*) &thread_stack, - (gptr*) &thread_stack, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK, + "The stack size for each thread.", (gptr*) &my_thread_stack_size, + (gptr*) &my_thread_stack_size, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK, 1024L*128L, ~0L, 0, 1024, 0}, { "time_format", OPT_TIME_FORMAT, "The TIME format (for future).", diff --git a/sql/set_var.cc b/sql/set_var.cc index b0ecc7eccef..6c859d6bda9 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -1037,10 +1037,10 @@ SHOW_VAR init_vars[]= { #ifdef HAVE_THR_SETCONCURRENCY {"thread_concurrency", (char*) &concurrency, SHOW_LONG}, #endif - {"thread_stack", (char*) &thread_stack, SHOW_LONG}, + {"thread_stack", (char*) &my_thread_stack_size, SHOW_LONG}, {sys_time_format.name, (char*) &sys_time_format, SHOW_SYS}, {"time_zone", (char*) &sys_time_zone, SHOW_SYS}, - {sys_timed_mutexes.name, (char*) &sys_timed_mutexes, SHOW_SYS}, + {sys_timed_mutexes.name, (char*) &sys_timed_mutexes, SHOW_SYS}, {sys_tmp_table_size.name, (char*) &sys_tmp_table_size, SHOW_SYS}, {sys_tmpdir.name, (char*) &sys_tmpdir, SHOW_SYS}, {sys_trans_alloc_block_size.name, (char*) &sys_trans_alloc_block_size, diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 232df095816..9015470f11c 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5740,10 +5740,10 @@ bool check_stack_overrun(THD *thd, long margin, long stack_used; DBUG_ASSERT(thd == current_thd); if ((stack_used=used_stack(thd->thread_stack,(char*) &stack_used)) >= - (long) (thread_stack - margin)) + (long) (my_thread_stack_size - margin)) { sprintf(errbuff[0],ER(ER_STACK_OVERRUN_NEED_MORE), - stack_used,thread_stack,margin); + stack_used,my_thread_stack_size,margin); my_message(ER_STACK_OVERRUN_NEED_MORE,errbuff[0],MYF(0)); thd->fatal_error(); return 1; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index d34ff070eb1..f32a0d1fba3 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -26,6 +26,7 @@ #include "sql_cursor.h" #include +#include #include #include diff --git a/sql/sql_test.cc b/sql/sql_test.cc index b28aa4a1ce5..77bbee0bf69 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -455,7 +455,7 @@ void mysql_print_status() VOID(my_getwd(current_dir, sizeof(current_dir),MYF(0))); printf("Current dir: %s\n", current_dir); printf("Running threads: %d Stack size: %ld\n", thread_count, - (long) thread_stack); + (long) my_thread_stack_size); thr_print_locks(); // Write some debug info #ifndef DBUG_OFF print_cached_tables(); @@ -530,7 +530,7 @@ Estimated memory (with thread stack): %ld\n", (int) info.uordblks, (int) info.fordblks, (int) info.keepcost, - (long) (thread_count * thread_stack + info.hblkhd + info.arena)); + (long) (thread_count * my_thread_stack_size + info.hblkhd + info.arena)); #endif puts(""); } diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index 22cbde278be..1f45b2ce070 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -18,6 +18,7 @@ #include "ftdefs.h" #include "sp_defs.h" +#include #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ @@ -430,7 +431,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.state.rec_per_key_part[key_segs-1]=1L; length+=key_length; /* Get block length for key, if defined by user */ - block_length= (keydef->block_length ? + block_length= (keydef->block_length ? my_round_up_to_next_power(keydef->block_length) : myisam_block_size); block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH); diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 357128b7a40..498b433f846 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -27,6 +27,7 @@ #endif #include "myisamdef.h" #include +#include #define STANDARD_LENGTH 37 #define MYISAM_KEYS 6 diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index 7fcfb8fc65a..3a688613a4d 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef HAVE_SYS_VADVICE_H #include #endif diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 71408ce957f..1e7206441eb 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -14,13 +14,20 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include +//#define MY_ATOMIC_MODE_RWLOCKS +//#define MY_ATOMIC_MODE_DUMMY + #include + +#include #include #include +#include -int32 a32,b32,c32; +volatile uint32 a32,b32,c32; my_atomic_rwlock_t rwl; +LF_ALLOCATOR lf_allocator; +LF_HASH lf_hash; pthread_attr_t thr_attr; pthread_mutex_t mutex; @@ -30,9 +37,9 @@ int N; /* add and sub a random number in a loop. Must get 0 at the end */ pthread_handler_t test_atomic_add_handler(void *arg) { - int m=*(int *)arg; + int m=(*(int *)arg)/2; int32 x; - for (x=((int)(&m)); m ; m--) + for (x=((int)(intptr)(&m)); m ; m--) { x=x*m+0x87654321; my_atomic_rwlock_wrlock(&rwl); @@ -61,15 +68,9 @@ pthread_handler_t test_atomic_add_handler(void *arg) pthread_handler_t test_atomic_swap_handler(void *arg) { int m=*(int *)arg; - int32 x; + uint32 x=my_atomic_add32(&b32, 1); - my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_add32(&b32, 1); - my_atomic_rwlock_wrunlock(&rwl); - - my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); - my_atomic_rwlock_wrunlock(&rwl); for (; m ; m--) { @@ -98,29 +99,29 @@ pthread_handler_t test_atomic_swap_handler(void *arg) /* same as test_atomic_add_handler, but my_atomic_add32 is emulated with - (slower) my_atomic_cas32 + my_atomic_cas32 - notice that the slowdown is proportional to the + number of CPUs */ pthread_handler_t test_atomic_cas_handler(void *arg) { - int m=*(int *)arg, ok; - int32 x,y; - for (x=((int)(&m)); m ; m--) + int m=(*(int *)arg)/2, ok=0; + int32 x, y; + for (x=((int)(intptr)(&m)); m ; m--) { my_atomic_rwlock_wrlock(&rwl); y=my_atomic_load32(&a32); my_atomic_rwlock_wrunlock(&rwl); - x=x*m+0x87654321; do { my_atomic_rwlock_wrlock(&rwl); ok=my_atomic_cas32(&a32, &y, y+x); my_atomic_rwlock_wrunlock(&rwl); - } while (!ok); + } while (!ok) ; do { my_atomic_rwlock_wrlock(&rwl); ok=my_atomic_cas32(&a32, &y, y-x); my_atomic_rwlock_wrunlock(&rwl); - } while (!ok); + } while (!ok) ; } pthread_mutex_lock(&mutex); N--; @@ -129,6 +130,128 @@ pthread_handler_t test_atomic_cas_handler(void *arg) return 0; } +/* + pin allocator - alloc and release an element in a loop +*/ +pthread_handler_t test_lf_pinbox(void *arg) +{ + int m=*(int *)arg; + int32 x=0; + LF_PINS *pins; + + pins=lf_pinbox_get_pins(&lf_allocator.pinbox); + + for (x=((int)(intptr)(&m)); m ; m--) + { + lf_pinbox_put_pins(pins); + pins=lf_pinbox_get_pins(&lf_allocator.pinbox); + } + lf_pinbox_put_pins(pins); + pthread_mutex_lock(&mutex); + N--; + if (!N) + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + return 0; +} + +typedef union { + int32 data; + void *not_used; /* to guarantee sizeof(TLA) >= sizeof(void *) */ +} TLA; + +pthread_handler_t test_lf_alloc(void *arg) +{ + int m=(*(int *)arg)/2; + int32 x,y=0; + LF_PINS *pins; + + pins=lf_alloc_get_pins(&lf_allocator); + + for (x=((int)(intptr)(&m)); m ; m--) + { + TLA *node1, *node2; + x=x*m+0x87654321; + node1=(TLA *)lf_alloc_new(pins); + node1->data=x; + y+=node1->data; + node1->data=0; + node2=(TLA *)lf_alloc_new(pins); + node2->data=x; + y-=node2->data; + node2->data=0; + lf_alloc_free(pins, node1); + lf_alloc_free(pins, node2); + } + lf_alloc_put_pins(pins); + my_atomic_rwlock_wrlock(&rwl); + my_atomic_add32(&a32, y); + my_atomic_rwlock_wrunlock(&rwl); + pthread_mutex_lock(&mutex); + N--; + if (!N) + { + diag("%d mallocs, %d pins in stack", + lf_allocator.mallocs, lf_allocator.pinbox.pins_in_stack); +#ifdef MY_LF_EXTRA_DEBUG + a32|=lf_allocator.mallocs - lf_alloc_in_pool(&lf_allocator); +#endif + pthread_cond_signal(&cond); + } + pthread_mutex_unlock(&mutex); + return 0; +} + +#define N_TLH 1000 +pthread_handler_t test_lf_hash(void *arg) +{ + int m=(*(int *)arg)/(2*N_TLH); + int32 x,y,z,sum=0, ins=0; + LF_PINS *pins; + + pins=lf_hash_get_pins(&lf_hash); + + for (x=((int)(intptr)(&m)); m ; m--) + { + int i; + y=x; + for (i=0; i < N_TLH; i++) + { + x=x*(m+i)+0x87654321; + z=(x<0) ? -x : x; + if (lf_hash_insert(&lf_hash, pins, &z)) + { + sum+=z; + ins++; + } + } + for (i=0; i < N_TLH; i++) + { + y=y*(m+i)+0x87654321; + z=(y<0) ? -y : y; + if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z))) + sum-=z; + } + } + lf_hash_put_pins(pins); + my_atomic_rwlock_wrlock(&rwl); + my_atomic_add32(&a32, sum); + my_atomic_add32(&b32, ins); + my_atomic_rwlock_wrunlock(&rwl); + pthread_mutex_lock(&mutex); + N--; + if (!N) + { + diag("%d mallocs, %d pins in stack, %d hash size, %d inserts", + lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_stack, + lf_hash.size, b32); + a32|=lf_hash.count; + pthread_cond_signal(&cond); + } + pthread_mutex_unlock(&mutex); + return 0; +} + void test_atomic(const char *test, pthread_handler handler, int n, int m) { pthread_t t; @@ -141,23 +264,24 @@ void test_atomic(const char *test, pthread_handler handler, int n, int m) diag("Testing %s with %d threads, %d iterations... ", test, n, m); for (N=n ; n ; n--) pthread_create(&t, &thr_attr, handler, &m); - pthread_mutex_lock(&mutex); while (N) pthread_cond_wait(&cond, &mutex); pthread_mutex_unlock(&mutex); now=my_getsystime()-now; - ok(a32 == 0, "tested %s in %g secs", test, ((double)now)/1e7); + ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32); } int main() { int err; - diag("N CPUs: %d", my_getncpus()); + my_init(); + + diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE); err= my_atomic_initialize(); - plan(4); + plan(7); ok(err == 0, "my_atomic_initialize() returned %d", err); pthread_attr_init(&thr_attr); @@ -165,15 +289,31 @@ int main() pthread_mutex_init(&mutex, 0); pthread_cond_init(&cond, 0); my_atomic_rwlock_init(&rwl); + lf_alloc_init(&lf_allocator, sizeof(TLA)); + lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0, + &my_charset_bin); - test_atomic("my_atomic_add32", test_atomic_add_handler, 100,1000000); - test_atomic("my_atomic_swap32", test_atomic_swap_handler, 100,1000000); - test_atomic("my_atomic_cas32", test_atomic_cas_handler, 100,1000000); +#ifdef MY_ATOMIC_MODE_RWLOCKS +#define CYCLES 10000 +#else +#define CYCLES 1000000 +#endif +#define THREADS 100 + test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES); + test_atomic("my_atomic_swap32", test_atomic_swap_handler, THREADS,CYCLES); + test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES); + test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES); + test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); + test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES); + + lf_hash_end(&lf_hash); + lf_alloc_end(&lf_allocator); pthread_mutex_destroy(&mutex); pthread_cond_destroy(&cond); pthread_attr_destroy(&thr_attr); my_atomic_rwlock_destroy(&rwl); + my_end(0); return exit_status(); } From d1a8a2c7345847fc750c2d36908913dedb052c9c Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Aug 2006 01:16:51 +0200 Subject: [PATCH 17/95] fixes for inline bit functions --- sql/ha_maria.cc | 1 + storage/maria/ma_create.c | 1 + storage/maria/ma_test2.c | 1 + storage/maria/maria_chk.c | 1 + 4 files changed, 4 insertions(+) diff --git a/sql/ha_maria.cc b/sql/ha_maria.cc index 29718f1493e..b883d56e3f7 100644 --- a/sql/ha_maria.cc +++ b/sql/ha_maria.cc @@ -22,6 +22,7 @@ #include "mysql_priv.h" #include #include +#include #include "ha_maria.h" #ifndef MASTER #include "../srclib/maria/maria_def.h" diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index b9fb4eb0d5b..5926bba9406 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -18,6 +18,7 @@ #include "ma_ftdefs.h" #include "ma_sp_defs.h" +#include #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 840ecb2eeb7..40f7d2aaffb 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -27,6 +27,7 @@ #endif #include "maria_def.h" #include +#include #define STANDARD_LENGTH 37 #define MARIA_KEYS 6 diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index e423a3f5c36..89858ee2d07 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -18,6 +18,7 @@ #include "ma_fulltext.h" #include +#include #include #include #include From 74d050d000ff9db79e36931988386fe7988f8dd2 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 17 Aug 2006 15:20:58 +0200 Subject: [PATCH 18/95] maria transaction manager with unit tests include/lf.h: few lf API changes mysys/lf_alloc-pin.c: few lf API changes mysys/lf_dynarray.c: few lf API changes mysys/lf_hash.c: few lf API changes storage/maria/Makefile.am: transaction manager unittest/Makefile.am: maria transaction manager unittest/mysys/my_atomic-t.c: ensure that values are positive storage/maria/trxman.h: New BitKeeper file ``storage/maria/trxman.h'' unittest/maria/Makefile.am: New BitKeeper file ``unittest/maria/Makefile.am'' unittest/maria/trxman-t.c: New BitKeeper file ``unittest/maria/trxman-t.c'' storage/maria/trxman.c: comment clarified --- configure.in | 2 +- include/lf.h | 16 +-- mysys/lf_alloc-pin.c | 8 +- mysys/lf_dynarray.c | 2 +- mysys/lf_hash.c | 78 ++++------- storage/maria/Makefile.am | 6 +- storage/maria/trxman.c | 261 +++++++++++++++++++++++++++++++++++ storage/maria/trxman.h | 28 ++++ unittest/Makefile.am | 4 +- unittest/maria/Makefile.am | 12 ++ unittest/maria/trxman-t.c | 138 ++++++++++++++++++ unittest/mysys/my_atomic-t.c | 17 +-- 12 files changed, 495 insertions(+), 77 deletions(-) create mode 100644 storage/maria/trxman.c create mode 100644 storage/maria/trxman.h create mode 100644 unittest/maria/Makefile.am create mode 100644 unittest/maria/trxman-t.c diff --git a/configure.in b/configure.in index e984f1524da..d518760ca74 100644 --- a/configure.in +++ b/configure.in @@ -2537,7 +2537,7 @@ AC_SUBST(MAKE_BINARY_DISTRIBUTION_OPTIONS) # Output results AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl unittest/Makefile unittest/mytap/Makefile unittest/mytap/t/Makefile dnl - unittest/mysys/Makefile unittest/examples/Makefile dnl + unittest/mysys/Makefile unittest/examples/Makefile unittest/maria/Makefile dnl strings/Makefile regex/Makefile storage/Makefile dnl man/Makefile BUILD/Makefile vio/Makefile dnl libmysql/Makefile client/Makefile dnl diff --git a/include/lf.h b/include/lf.h index 299b4e07aa5..6a5047f6052 100644 --- a/include/lf.h +++ b/include/lf.h @@ -66,7 +66,7 @@ typedef struct { typedef int (*lf_dynarray_func)(void *, void *); void lf_dynarray_init(LF_DYNARRAY *array, uint element_size); -void lf_dynarray_end(LF_DYNARRAY *array); +void lf_dynarray_destroy(LF_DYNARRAY *array); nolock_wrap(lf_dynarray_nr, int, (LF_DYNARRAY *array, void *el), @@ -139,15 +139,15 @@ typedef struct { #define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL) #define lf_pin(PINS, PIN, ADDR) \ do { \ - lf_lock_pins(PINS); \ + lf_lock_by_pins(PINS); \ _lf_pin(PINS, PIN, ADDR); \ - lf_unlock_pins(PINS); \ + lf_unlock_by_pins(PINS); \ } while (0) #define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, void * free_func_arg); -void lf_pinbox_end(LF_PINBOX *pinbox); +void lf_pinbox_destroy(LF_PINBOX *pinbox); lock_wrap(lf_pinbox_get_pins, LF_PINS *, (LF_PINBOX *pinbox), @@ -180,7 +180,7 @@ typedef struct st_lf_allocator { } LF_ALLOCATOR; void lf_alloc_init(LF_ALLOCATOR *allocator, uint size); -void lf_alloc_end(LF_ALLOCATOR *allocator); +void lf_alloc_destroy(LF_ALLOCATOR *allocator); uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); #define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) #define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) @@ -216,10 +216,10 @@ typedef struct { void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset); -void lf_hash_end(LF_HASH *hash); +void lf_hash_destroy(LF_HASH *hash); int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data); -int lf_hash_search(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen); -int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen); +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); #define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc) #define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc) #define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS) diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index a9ea1802c03..cf1612b73d1 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -54,9 +54,9 @@ void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, pinbox->free_func_arg=free_func_arg; } -void lf_pinbox_end(LF_PINBOX *pinbox) +void lf_pinbox_destroy(LF_PINBOX *pinbox) { - lf_dynarray_end(&pinbox->pinstack); + lf_dynarray_destroy(&pinbox->pinstack); } LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) @@ -292,7 +292,7 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size) DBUG_ASSERT(size >= (int)sizeof(void *)); } -void lf_alloc_end(LF_ALLOCATOR *allocator) +void lf_alloc_destroy(LF_ALLOCATOR *allocator) { void *el=allocator->top; while (el) @@ -301,7 +301,7 @@ void lf_alloc_end(LF_ALLOCATOR *allocator) my_free(el, MYF(0)); el=tmp; } - lf_pinbox_end(&allocator->pinbox); + lf_pinbox_destroy(&allocator->pinbox); allocator->top=0; } diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index dcf99163ca1..0fa04ab095c 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -57,7 +57,7 @@ static void recursive_free(void **alloc, int level) my_free(alloc[-1], MYF(0)); } -void lf_dynarray_end(LF_DYNARRAY *array) +void lf_dynarray_destroy(LF_DYNARRAY *array) { int i; for (i=0; i < LF_DYNARRAY_LEVELS; i++) diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 6d3d30ebc3f..57936ea7b1f 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -228,14 +228,14 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, hash->count=0; hash->element_size=element_size; hash->flags=flags; - hash->charset=charset; + hash->charset=charset ? charset : &my_charset_bin; hash->key_offset=key_offset; hash->key_length=key_length; hash->get_key=get_key; DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); } -void lf_hash_end(LF_HASH *hash) +void lf_hash_destroy(LF_HASH *hash) { LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); while (el) @@ -244,109 +244,89 @@ void lf_hash_end(LF_HASH *hash) lf_alloc_real_free(&hash->alloc, el); el=(LF_SLIST *)next; } - lf_alloc_end(&hash->alloc); - lf_dynarray_end(&hash->array); + lf_alloc_destroy(&hash->alloc); + lf_dynarray_destroy(&hash->array); } /* + RETURN + 0 - inserted + 1 - didn't (unique key conflict) NOTE - see linsert() for pin usage + see linsert() for pin usage notes */ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) { - uint csize, bucket, hashnr, keylen; + uint csize, bucket, hashnr; LF_SLIST *node, * volatile *el; - const uchar *key; - key= hash_key(hash, data, &keylen); - hashnr= calc_hash(hash, key, keylen); - bucket= hashnr % hash->size; lf_lock_by_pins(pins); node=(LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); + node->key= hash_key(hash, node+1, &node->keylen); + hashnr= calc_hash(hash, node->key, node->keylen); + bucket= hashnr % hash->size; el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); node->hashnr=my_reverse_bits(hashnr) | 1; - node->key=((char *)(node+1))+(key-(uchar *)data); - node->keylen=keylen; if (linsert(el, node, pins, hash->flags)) { _lf_alloc_free(pins, node); lf_unlock_by_pins(pins); - return 0; + return 1; } csize= hash->size; if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) my_atomic_cas32(&hash->size, &csize, csize*2); -#if 0 - node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); - hashnr=0; - while (node) - { - assert (node->hashnr >= hashnr); - hashnr=node->hashnr; - node=(LF_SLIST *)node->link; - } -#endif lf_unlock_by_pins(pins); - return 1; + return 0; } /* + RETURN + 0 - deleted + 1 - didn't (not found) NOTE - see ldelete() for pin usage + see ldelete() for pin usage notes */ -int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el; - uint bucket, hashnr=calc_hash(hash, key, keylen); + uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; lf_lock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - if (ldelete(el, my_reverse_bits(hashnr) | 1, key, keylen, pins)) + if (ldelete(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins)) { lf_unlock_by_pins(pins); - return 0; + return 1; } my_atomic_add32(&hash->count, -1); -#if 0 - { - LF_SLIST *node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); - hashnr=0; - while (node) - { - assert (node->hashnr >= hashnr); - hashnr=node->hashnr; - node=(LF_SLIST *)node->link; - } - } -#endif lf_unlock_by_pins(pins); - return 1; + return 0; } /* NOTE - see lsearch() for pin usage + see lsearch() for pin usage notes */ -int lf_hash_search(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { - int res; - LF_SLIST * volatile *el; - uint bucket, hashnr=calc_hash(hash, key, keylen); + LF_SLIST * volatile *el, *found; + uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; lf_lock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - res=NULL != lsearch(el, my_reverse_bits(hashnr) | 1, key, keylen, pins); + found= lsearch(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins); lf_unlock_by_pins(pins); - return res; + return found+1; } static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index d4315b4d446..cacfa5dbcdc 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -28,7 +28,9 @@ bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_chk_DEPENDENCIES= $(LIBRARIES) maria_pack_DEPENDENCIES=$(LIBRARIES) noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test -noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h +noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ + ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ + ma_ft_eval.h trxman.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test2_DEPENDENCIES= $(LIBRARIES) ma_test3_DEPENDENCIES= $(LIBRARIES) @@ -53,7 +55,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_ft_update.c ma_ft_boolean_search.c \ ma_ft_nlq_search.c ft_maria.c ma_sort.c \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ - ma_sp_key.c + ma_sp_key.c trxman.c CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? DEFS = diff --git a/storage/maria/trxman.c b/storage/maria/trxman.c new file mode 100644 index 00000000000..3c64d93183a --- /dev/null +++ b/storage/maria/trxman.c @@ -0,0 +1,261 @@ + +#include +#include +#include +#include "trxman.h" + +TRX active_list_min, active_list_max, + committed_list_min, committed_list_max, *pool; + +pthread_mutex_t LOCK_trx_list; +uint active_transactions; +TrID global_trid_generator; + +TRX **short_id_to_trx; +my_atomic_rwlock_t LOCK_short_id_to_trx; + +LF_HASH trid_to_trx; + +static byte *trx_get_hash_key(const byte *trx,uint* len, my_bool unused) +{ + *len= sizeof(TrID); + return (byte *) & ((*((TRX **)trx))->trid); +} + +int trxman_init() +{ + pthread_mutex_init(&LOCK_trx_list, MY_MUTEX_INIT_FAST); + active_list_max.trid= active_list_min.trid= 0; + active_list_max.min_read_from=~0; + active_list_max.next= active_list_min.prev= 0; + active_list_max.prev= &active_list_min; + active_list_min.next= &active_list_max; + active_transactions= 0; + + committed_list_max.commit_trid= ~0; + committed_list_max.next= committed_list_min.prev= 0; + committed_list_max.prev= &committed_list_min; + committed_list_min.next= &committed_list_max; + + pool=0; + global_trid_generator=0; /* set later by recovery code */ + lf_hash_init(&trid_to_trx, sizeof(TRX*), LF_HASH_UNIQUE, + 0, 0, trx_get_hash_key, 0); + my_atomic_rwlock_init(&LOCK_short_id_to_trx); + short_id_to_trx=(TRX **)my_malloc(SHORT_ID_MAX*sizeof(TRX*), + MYF(MY_WME|MY_ZEROFILL)); + if (!short_id_to_trx) + return 1; + short_id_to_trx--; /* min short_id is 1 */ + + return 0; +} + +int trxman_destroy() +{ + DBUG_ASSERT(trid_to_trx.count == 0); + DBUG_ASSERT(active_transactions == 0); + DBUG_ASSERT(active_list_max.prev == &active_list_min); + DBUG_ASSERT(active_list_min.next == &active_list_max); + DBUG_ASSERT(committed_list_max.prev == &committed_list_min); + DBUG_ASSERT(committed_list_min.next == &committed_list_max); + while (pool) + { + TRX *tmp=pool->next; + my_free(pool, MYF(0)); + pool=tmp; + } + lf_hash_destroy(&trid_to_trx); + pthread_mutex_destroy(&LOCK_trx_list); + my_atomic_rwlock_destroy(&LOCK_short_id_to_trx); + my_free((void *)(short_id_to_trx+1), MYF(0)); +} + +static TrID new_trid() +{ + DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); + safe_mutex_assert_owner(&LOCK_trx_list); + return ++global_trid_generator; +} + +static void set_short_id(TRX *trx) +{ + int i= (global_trid_generator + (intptr)trx) * 312089 % SHORT_ID_MAX; + my_atomic_rwlock_wrlock(&LOCK_short_id_to_trx); + for ( ; ; i= i % SHORT_ID_MAX + 1) /* the range is [1..SHORT_ID_MAX] */ + { + void *tmp=NULL; + if (short_id_to_trx[i] == NULL && + my_atomic_casptr((void **)&short_id_to_trx[i], &tmp, trx)) + break; + } + my_atomic_rwlock_wrunlock(&LOCK_short_id_to_trx); + trx->short_id= i; +} + +extern int global_malloc; +TRX *trxman_new_trx() +{ + TRX *trx; + + my_atomic_add32(&active_transactions, 1); + + /* + we need a mutex here to ensure that + transactions in the active list are ordered by the trid. + So, incrementing global_trid_generator and + adding to the list must be atomic. + + and as we have a mutex, we can as well do everything + under it - allocating a TRX, incrementing active_transactions, + setting trx->min_read_from. + + Note that all the above is fast. generating short_id may be slow, + as it involves scanning a big array - so it's still done + outside of the mutex. + */ + + pthread_mutex_lock(&LOCK_trx_list); + trx=pool; + while (trx && !my_atomic_casptr((void **)&pool, (void **)&trx, trx->next)) + /* no-op */; + + if (!trx) + { + trx=(TRX *)my_malloc(sizeof(TRX), MYF(MY_WME)); + global_malloc++; + } + if (!trx) + return 0; + + trx->min_read_from= active_list_min.next->trid; + + trx->trid= new_trid(); + trx->short_id= 0; + + trx->next= &active_list_max; + trx->prev= active_list_max.prev; + active_list_max.prev= trx->prev->next= trx; + pthread_mutex_unlock(&LOCK_trx_list); + + trx->pins=lf_hash_get_pins(&trid_to_trx); + + if (!trx->min_read_from) + trx->min_read_from= trx->trid; + + trx->commit_trid=0; + + set_short_id(trx); /* this must be the last! */ + + + return trx; +} + +/* + remove a trx from the active list, + move to committed list, + set commit_trid + + TODO + integrate with lock manager, log manager. That means: + a common "commit" mutex - forcing the log and setting commit_trid + must be done atomically (QQ how the heck it could be done with + group commit ???) + + trid_to_trx, active_list_*, and committed_list_* can be + updated asyncronously. +*/ +void trxman_end_trx(TRX *trx, my_bool commit) +{ + int res; + TRX *free_me= 0; + LF_PINS *pins= trx->pins; + + pthread_mutex_lock(&LOCK_trx_list); + trx->next->prev= trx->prev; + trx->prev->next= trx->next; + + if (trx->prev == &active_list_min) + { + TRX *t; + for (t= committed_list_min.next; + t->commit_trid < active_list_min.next->min_read_from; + t= t->next) /* no-op */; + + if (t != committed_list_min.next) + { + free_me= committed_list_min.next; + committed_list_min.next= t; + t->prev->next=0; + t->prev= &committed_list_min; + } + } + + my_atomic_rwlock_wrlock(&LOCK_short_id_to_trx); + my_atomic_storeptr((void **)&short_id_to_trx[trx->short_id], 0); + my_atomic_rwlock_wrunlock(&LOCK_short_id_to_trx); + + if (commit && active_list_min.next != &active_list_max) + { + trx->commit_trid= global_trid_generator; + + trx->next= &committed_list_max; + trx->prev= committed_list_max.prev; + committed_list_max.prev= trx->prev->next= trx; + + res= lf_hash_insert(&trid_to_trx, pins, &trx); + DBUG_ASSERT(res == 0); + } + else + { + trx->next=free_me; + free_me=trx; + } + pthread_mutex_unlock(&LOCK_trx_list); + + my_atomic_add32(&active_transactions, -1); + + while (free_me) + { + int res; + TRX *t= free_me; + free_me= free_me->next; + + res= lf_hash_delete(&trid_to_trx, pins, &t->trid, sizeof(TrID)); + + trxman_free_trx(t); + } + + lf_hash_put_pins(pins); +} + +/* free a trx (add to the pool, that is */ +void trxman_free_trx(TRX *trx) +{ + TRX *tmp=pool; + + do + { + trx->next=tmp; + } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trx)); +} + +my_bool trx_can_read_from(TRX *trx, TrID trid) +{ + TRX *found; + my_bool can; + + if (trid < trx->min_read_from) + return TRUE; + if (trid > trx->trid) + return FALSE; + + found= lf_hash_search(&trid_to_trx, trx->pins, &trid, sizeof(trid)); + if (!found) + return FALSE; /* not in the hash = cannot read */ + + can= found->commit_trid < trx->trid; + lf_unpin(trx->pins, 2); + return can; +} + diff --git a/storage/maria/trxman.h b/storage/maria/trxman.h new file mode 100644 index 00000000000..ae794470a47 --- /dev/null +++ b/storage/maria/trxman.h @@ -0,0 +1,28 @@ + +typedef uint64 TrID; /* our TrID is 6 bytes */ + +typedef struct st_transaction +{ + TrID trid, min_read_from, commit_trid; + struct st_transaction *next, *prev; + /* Note! if short_id is 0, trx is NOT initialized */ + uint16 short_id; + LF_PINS *pins; +} TRX; + +#define SHORT_ID_MAX 65535 + +extern uint active_transactions; + +extern TRX **short_id_to_trx; +extern my_atomic_rwlock_t LOCK_short_id_to_trx; + +int trxman_init(); +int trxman_end(); +TRX *trxman_new_trx(); +void trxman_end_trx(TRX *trx, my_bool commit); +#define trxman_commit_trx(T) trxman_end_trx(T, TRUE) +#define trxman_abort_trx(T) trxman_end_trx(T, FALSE) +void trxman_free_trx(TRX *trx); +my_bool trx_can_read_from(TRX *trx, TrID trid); + diff --git a/unittest/Makefile.am b/unittest/Makefile.am index ca3291efde0..295ecd7186f 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -1,10 +1,10 @@ -SUBDIRS = mytap . mysys examples +SUBDIRS = mytap mysys maria examples noinst_SCRIPTS = unit EXTRA_DIST = unit.pl CLEANFILES = unit -unittests = mytap mysys +unittests = mytap mysys maria test: unit ./unit run $(unittests) diff --git a/unittest/maria/Makefile.am b/unittest/maria/Makefile.am new file mode 100644 index 00000000000..667d1e09a07 --- /dev/null +++ b/unittest/maria/Makefile.am @@ -0,0 +1,12 @@ + +AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include +AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap + +LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ + $(top_builddir)/storage/maria/libmaria.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a + +noinst_PROGRAMS = trxman-t + diff --git a/unittest/maria/trxman-t.c b/unittest/maria/trxman-t.c new file mode 100644 index 00000000000..a29bf5abc8e --- /dev/null +++ b/unittest/maria/trxman-t.c @@ -0,0 +1,138 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + +#include +#include +#include +#include +#include "../../storage/maria/trxman.h" + +pthread_attr_t rt_attr; +pthread_mutex_t rt_mutex; +pthread_cond_t rt_cond; +int rt_num_threads; + +int litmus; + +/* template for a test: the goal is to have litmus==0 if the test passed + +#define ITER nnn +pthread_handler_t test_XXXXXXXX(void *arg) +{ + int m=(*(int *)arg)/ITER, x; + + for (x=((int)(intptr)(&m)); m ; m--) + { + // do something with litmus + } + // do something more with litmus + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + if (!rt_num_threads) + { + diag("whatever diagnostics we want", blabla, foobar); + pthread_cond_signal(&rt_cond); + } + pthread_mutex_unlock(&rt_mutex); + return 0; +} +#undef ITER + +*/ + +/* + create and end (commit or rollback) transactions randomly +*/ +#define MAX_ITER 100 +pthread_handler_t test_trxman(void *arg) +{ + int m=(*(int *)arg); + uint x, y, i, j, n; + TRX *trx[MAX_ITER]; + + for (x=((int)(intptr)(&m)); m > 0; ) + { + y= x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + m-= n= x % MAX_ITER; + for (i=0; i < n; i++) + trx[i]=trxman_new_trx(); + for (i=0; i < n; i++) + { + y=(y*19 + 7) % 31; + trxman_end_trx(trx[i], y & 1); + } + } + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + if (!rt_num_threads) + pthread_cond_signal(&rt_cond); + pthread_mutex_unlock(&rt_mutex); + return 0; +} +#undef MAX_ITER + +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t t; + ulonglong now=my_getsystime(); + + litmus= 0; + + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + for (rt_num_threads=n ; n ; n--) + pthread_create(&t, &rt_attr, handler, &m); + pthread_mutex_lock(&rt_mutex); + while (rt_num_threads) + pthread_cond_wait(&rt_cond, &rt_mutex); + pthread_mutex_unlock(&rt_mutex); + now=my_getsystime()-now; + ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); +} + +int global_malloc=0; +int main() +{ + plan(1); + + if (my_atomic_initialize()) + return exit_status(); + + my_init(); + + pthread_attr_init(&rt_attr); + pthread_attr_setdetachstate(&rt_attr,PTHREAD_CREATE_DETACHED); + pthread_mutex_init(&rt_mutex, 0); + pthread_cond_init(&rt_cond, 0); + +#define CYCLES 10000 +#define THREADS 10 + + trxman_init(); + run_test("trxman", test_trxman, THREADS,CYCLES); + trxman_destroy(); + diag("mallocs: %d\n", global_malloc); + + pthread_mutex_destroy(&rt_mutex); + pthread_cond_destroy(&rt_cond); + pthread_attr_destroy(&rt_attr); + my_end(0); + return exit_status(); +} + diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 1e7206441eb..8962131d45a 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -14,9 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -//#define MY_ATOMIC_MODE_RWLOCKS -//#define MY_ATOMIC_MODE_DUMMY - #include #include @@ -41,7 +38,7 @@ pthread_handler_t test_atomic_add_handler(void *arg) int32 x; for (x=((int)(intptr)(&m)); m ; m--) { - x=x*m+0x87654321; + x=(x*m+0x87654321) & INT_MAX32; my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); my_atomic_rwlock_wrunlock(&rwl); @@ -111,7 +108,7 @@ pthread_handler_t test_atomic_cas_handler(void *arg) my_atomic_rwlock_wrlock(&rwl); y=my_atomic_load32(&a32); my_atomic_rwlock_wrunlock(&rwl); - x=x*m+0x87654321; + x=(x*m+0x87654321) & INT_MAX32; do { my_atomic_rwlock_wrlock(&rwl); ok=my_atomic_cas32(&a32, &y, y+x); @@ -171,7 +168,7 @@ pthread_handler_t test_lf_alloc(void *arg) for (x=((int)(intptr)(&m)); m ; m--) { TLA *node1, *node2; - x=x*m+0x87654321; + x=(x*m+0x87654321) & INT_MAX32; node1=(TLA *)lf_alloc_new(pins); node1->data=x; y+=node1->data; @@ -217,7 +214,7 @@ pthread_handler_t test_lf_hash(void *arg) y=x; for (i=0; i < N_TLH; i++) { - x=x*(m+i)+0x87654321; + x=(x*(m+i)+0x87654321) & INT_MAX32; z=(x<0) ? -x : x; if (lf_hash_insert(&lf_hash, pins, &z)) { @@ -227,7 +224,7 @@ pthread_handler_t test_lf_hash(void *arg) } for (i=0; i < N_TLH; i++) { - y=y*(m+i)+0x87654321; + y=(y*(m+i)+0x87654321) & INT_MAX32; z=(y<0) ? -y : y; if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z))) sum-=z; @@ -307,8 +304,8 @@ int main() test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES); - lf_hash_end(&lf_hash); - lf_alloc_end(&lf_allocator); + lf_hash_destroy(&lf_hash); + lf_alloc_destroy(&lf_allocator); pthread_mutex_destroy(&mutex); pthread_cond_destroy(&cond); pthread_attr_destroy(&thr_attr); From cd40855e9d8ee3a70f53aa061a9e303eec38a52d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 17 Aug 2006 15:22:54 +0200 Subject: [PATCH 19/95] lf_hash: only data nodes use lf_alloc now, dummy nodes are malloc'ed directly mysys/lf_hash.c: only data nodes use lf_alloc now, dummy nodes are malloc'ed directly --- mysys/lf_hash.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 57936ea7b1f..736c3ea4887 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -18,7 +18,6 @@ extensible hash TODO - dummy nodes use element_size=0 try to get rid of dummy nodes ? */ #include @@ -241,7 +240,10 @@ void lf_hash_destroy(LF_HASH *hash) while (el) { intptr next=el->link; - lf_alloc_real_free(&hash->alloc, el); + if (el->hashnr & 1) + lf_alloc_real_free(&hash->alloc, el); + else + my_free((void *)el, MYF(0)); el=(LF_SLIST *)next; } lf_alloc_destroy(&hash->alloc); @@ -263,7 +265,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) lf_lock_by_pins(pins); node=(LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); - node->key= hash_key(hash, node+1, &node->keylen); + node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); hashnr= calc_hash(hash, node->key, node->keylen); bucket= hashnr % hash->size; el=_lf_dynarray_lvalue(&hash->array, bucket); @@ -329,17 +331,20 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) return found+1; } +static char *dummy_key=""; + static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) { uint parent= my_clear_highest_bit(bucket); - LF_SLIST *dummy=_lf_alloc_new(pins), **tmp=0, *cur; + LF_SLIST *dummy=(LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp=0, *cur; LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); if (*el == NULL && bucket) initialize_bucket(hash, el, parent, pins); dummy->hashnr=my_reverse_bits(bucket); - LINT_INIT(dummy->key); - LINT_INIT(dummy->keylen); + dummy->key=dummy_key; + dummy->keylen=0; if ((cur= linsert(el, dummy, pins, 0))) { _lf_alloc_free(pins, dummy); From d83bc171b6775281d556604b47d6d13a8bf5cecf Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 23 Aug 2006 17:27:21 +0200 Subject: [PATCH 20/95] renamed global variables --- storage/maria/trxman.c | 23 ++++++++++------------- storage/maria/trxman.h | 2 +- unittest/maria/trxman-t.c | 3 +-- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/storage/maria/trxman.c b/storage/maria/trxman.c index 3c64d93183a..a3e746af9ca 100644 --- a/storage/maria/trxman.c +++ b/storage/maria/trxman.c @@ -8,7 +8,7 @@ TRX active_list_min, active_list_max, committed_list_min, committed_list_max, *pool; pthread_mutex_t LOCK_trx_list; -uint active_transactions; +uint trxman_active_transactions, trxman_allocated_transactions; TrID global_trid_generator; TRX **short_id_to_trx; @@ -30,7 +30,8 @@ int trxman_init() active_list_max.next= active_list_min.prev= 0; active_list_max.prev= &active_list_min; active_list_min.next= &active_list_max; - active_transactions= 0; + trxman_active_transactions= 0; + trxman_allocated_transactions= 0; committed_list_max.commit_trid= ~0; committed_list_max.next= committed_list_min.prev= 0; @@ -54,7 +55,7 @@ int trxman_init() int trxman_destroy() { DBUG_ASSERT(trid_to_trx.count == 0); - DBUG_ASSERT(active_transactions == 0); + DBUG_ASSERT(trxman_active_transactions == 0); DBUG_ASSERT(active_list_max.prev == &active_list_min); DBUG_ASSERT(active_list_min.next == &active_list_max); DBUG_ASSERT(committed_list_max.prev == &committed_list_min); @@ -62,7 +63,7 @@ int trxman_destroy() while (pool) { TRX *tmp=pool->next; - my_free(pool, MYF(0)); + my_free((void *)pool, MYF(0)); pool=tmp; } lf_hash_destroy(&trid_to_trx); @@ -93,21 +94,17 @@ static void set_short_id(TRX *trx) trx->short_id= i; } -extern int global_malloc; TRX *trxman_new_trx() { TRX *trx; - my_atomic_add32(&active_transactions, 1); + my_atomic_add32(&trxman_active_transactions, 1); /* - we need a mutex here to ensure that - transactions in the active list are ordered by the trid. - So, incrementing global_trid_generator and - adding to the list must be atomic. + see trxman_end_trx to see why we need a mutex here and as we have a mutex, we can as well do everything - under it - allocating a TRX, incrementing active_transactions, + under it - allocating a TRX, incrementing trxman_active_transactions, setting trx->min_read_from. Note that all the above is fast. generating short_id may be slow, @@ -123,7 +120,7 @@ TRX *trxman_new_trx() if (!trx) { trx=(TRX *)my_malloc(sizeof(TRX), MYF(MY_WME)); - global_malloc++; + trxman_allocated_transactions++; } if (!trx) return 0; @@ -213,7 +210,7 @@ void trxman_end_trx(TRX *trx, my_bool commit) } pthread_mutex_unlock(&LOCK_trx_list); - my_atomic_add32(&active_transactions, -1); + my_atomic_add32(&trxman_active_transactions, -1); while (free_me) { diff --git a/storage/maria/trxman.h b/storage/maria/trxman.h index ae794470a47..5ac989d03a4 100644 --- a/storage/maria/trxman.h +++ b/storage/maria/trxman.h @@ -12,7 +12,7 @@ typedef struct st_transaction #define SHORT_ID_MAX 65535 -extern uint active_transactions; +extern uint trxman_active_transactions, trxman_allocated_transactions; extern TRX **short_id_to_trx; extern my_atomic_rwlock_t LOCK_short_id_to_trx; diff --git a/unittest/maria/trxman-t.c b/unittest/maria/trxman-t.c index a29bf5abc8e..7536e5534bf 100644 --- a/unittest/maria/trxman-t.c +++ b/unittest/maria/trxman-t.c @@ -106,7 +106,6 @@ void run_test(const char *test, pthread_handler handler, int n, int m) ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); } -int global_malloc=0; int main() { plan(1); @@ -127,7 +126,7 @@ int main() trxman_init(); run_test("trxman", test_trxman, THREADS,CYCLES); trxman_destroy(); - diag("mallocs: %d\n", global_malloc); + diag("mallocs: %d\n", trxman_allocated_transactions); pthread_mutex_destroy(&rt_mutex); pthread_cond_destroy(&rt_cond); From 87aa4ae2d35d0291bbfc47ece3f19cfa8746a573 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Aug 2006 11:30:35 +0200 Subject: [PATCH 21/95] just comment changes and line formatting changes. storage/maria/checkpoint.c: comments storage/maria/least_recently_dirtied.c: comments storage/maria/ma_check.c: line formatting changes neglected from my last merge storage/maria/ma_ft_parser.c: it reduces the diff of MyISAM vs Maria :) storage/maria/recovery.c: comments --- storage/maria/checkpoint.c | 8 ++++---- storage/maria/least_recently_dirtied.c | 14 ++++++++++++-- storage/maria/ma_check.c | 17 +++++++++-------- storage/maria/ma_ft_parser.c | 2 +- storage/maria/recovery.c | 12 +----------- 5 files changed, 27 insertions(+), 26 deletions(-) diff --git a/storage/maria/checkpoint.c b/storage/maria/checkpoint.c index 151efe6ad4f..22e7b93d2f4 100644 --- a/storage/maria/checkpoint.c +++ b/storage/maria/checkpoint.c @@ -46,7 +46,8 @@ CHECKPOINT_LEVEL synchronous_checkpoint_in_progress= NONE; /* Used by MySQL client threads requesting a checkpoint (like "ALTER MARIA - ENGINE DO CHECKPOINT"), and probably by maria_panic(). + ENGINE DO CHECKPOINT"), and probably by maria_panic(), and at the end of the + UNDO recovery phase. */ my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) { @@ -342,8 +343,6 @@ log_write_record(...) (requestor does not wait for completion, and does not even later check the result). In real life it will be called by log_write_record(). - which explicitely wants to do checkpoint (ALTER ENGINE CHECKPOINT - checkpoint_level). */ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); { @@ -359,7 +358,8 @@ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS is passed), so it may not be a good idea for each of them to broadcast a cond to wake up the background checkpoint thread. We just don't broacast a cond, the checkpoint thread - will notice our request in max a few seconds. + (see least_recently_dirtied.c) will notice our request in max a few + seconds. */ checkpoint_request= level; /* post request */ } diff --git a/storage/maria/least_recently_dirtied.c b/storage/maria/least_recently_dirtied.c index bca13ca6f1f..c6285fe47cd 100644 --- a/storage/maria/least_recently_dirtied.c +++ b/storage/maria/least_recently_dirtied.c @@ -181,7 +181,15 @@ flush_one_group_from_LRD() */ } -/* flushes all page from LRD up to approximately rec_lsn>=max_lsn */ +/* + Flushes all page from LRD up to approximately rec_lsn>=max_lsn. + This is approximate because we flush groups, and because the LRD list may + not be exactly sorted by rec_lsn (because for a big row, all pages of the + row are inserted into the LRD with rec_lsn being the LSN of the REDO for the + first page, so if there are concurrent insertions, the last page of the big + row may have a smaller rec_lsn than the previous pages inserted by + concurrent inserters). +*/ int flush_all_LRD_to_lsn(LSN max_lsn) { lock(global_LRD_mutex); @@ -191,7 +199,9 @@ int flush_all_LRD_to_lsn(LSN max_lsn) { if (flush_one_group_from_LRD()) /* will unlock LRD mutex */ return 1; - /* scheduler may preempt us here so that we don't take full CPU */ + /* + The scheduler may preempt us here as we released the mutex; this is good. + */ lock(global_LRD_mutex); } unlock(global_LRD_mutex); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 69d863e6366..624df8a7881 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -460,10 +460,10 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) auto_increment= ma_retrieve_auto_increment(info, info->rec_buff); if (auto_increment > info->s->state.auto_increment) { - _ma_check_print_warning(param, - "Auto-increment value: %s is smaller than max used value: %s", - llstr(info->s->state.auto_increment,buff2), - llstr(auto_increment, buff)); + _ma_check_print_warning(param, "Auto-increment value: %s is smaller " + "than max used value: %s", + llstr(info->s->state.auto_increment,buff2), + llstr(auto_increment, buff)); } if (param->testflag & T_AUTO_INC) { @@ -481,8 +481,8 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) { /* Don't count this as a real warning, as mariachk can't correct it */ uint save=param->warning_printed; - _ma_check_print_warning(param, - "Found row where the auto_increment column has the value 0"); + _ma_check_print_warning(param, "Found row where the auto_increment " + "column has the value 0"); param->warning_printed=save; } maria_extra(info,HA_EXTRA_NO_KEYREAD,0); @@ -1165,8 +1165,9 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) SEARCH_SAME, info->s->state.key_root[key]); if (search_result) { - _ma_check_print_error(param,"Record at: %10s Can't find key for index: %2d", - llstr(start_recpos,llbuff),key+1); + _ma_check_print_error(param,"Record at: %10s " + "Can't find key for index: %2d", + llstr(start_recpos,llbuff),key+1); if (error++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err2; } diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c index 1c6e0267d53..e5ec97b090d 100644 --- a/storage/maria/ma_ft_parser.c +++ b/storage/maria/ma_ft_parser.c @@ -204,8 +204,8 @@ byte maria_ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, FT_WORD *word, my_bool skip_stopwords) { byte *doc= *start; - int ctype; uint mwc, length, mbl; + int ctype; DBUG_ENTER("maria_ft_simple_get_word"); do diff --git a/storage/maria/recovery.c b/storage/maria/recovery.c index 589e0971686..babf7507ef1 100644 --- a/storage/maria/recovery.c +++ b/storage/maria/recovery.c @@ -221,10 +221,6 @@ int recovery() /* mark that checkpoint requests are now allowed. */ - /* - when all rollback threads have terminated, somebody should print "rollback - finished" to the error log. - */ } pthread_handler_decl rollback_background_thread() @@ -248,13 +244,7 @@ pthread_handler_decl rollback_background_thread() { /* All rollback threads are done. Print "rollback finished" to the error - log. The UNDO phase has the reputation of being a slow operation - (slower than the REDO phase), so taking a checkpoint at the end of it is - intelligent, but as this UNDO phase generates REDOs and CLR_ENDs, if it - did a lot of work then the "automatic checkpoint when much has been - written to the log" will do it; and if the UNDO phase didn't do a lot of - work, no need for a checkpoint. If we change our mind and want to force - a checkpoint at the end of the UNDO phase, simply call it here. + log and take a full checkpoint. */ } unlock_mutex(rollback_threads); From 52191ea4d8dda55173a7c721371faf15bf0ea39c Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Aug 2006 22:10:06 +0200 Subject: [PATCH 22/95] importing Sanja's changes to the control file, with my changes on them. mysys/my_pread.c: print errno in case of error storage/maria/control_file.c: importing Sanja's changes, with my minor changes on them :) storage/maria/control_file.h: importing Sanja's changes, with my minor changes on them :) --- mysys/my_pread.c | 3 +- storage/maria/control_file.c | 154 +++++++++++++++++++++++++++-------- storage/maria/control_file.h | 26 +++++- 3 files changed, 147 insertions(+), 36 deletions(-) diff --git a/mysys/my_pread.c b/mysys/my_pread.c index ac52895efe9..072c18ae7f6 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -155,7 +155,8 @@ uint my_pwrite(int Filedes, const byte *Buffer, uint Count, my_off_t offset, Count-=writenbytes; offset+=writenbytes; } - DBUG_PRINT("error",("Write only %d bytes",writenbytes)); + DBUG_PRINT("error",("Write only %d bytes, error: %d", + writenbytes, my_errno)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) diff --git a/storage/maria/control_file.c b/storage/maria/control_file.c index 897e0b0f0ee..22ea860456a 100644 --- a/storage/maria/control_file.c +++ b/storage/maria/control_file.c @@ -4,74 +4,162 @@ Does not compile yet. */ +#include "maria_def.h" + + /* Here is the implementation of this module */ -/* Control file is 512 bytes (a disk sector), to be as atomic as possible */ +/* should be sector size for atomic write operation */ +#define STAT_FILE_FILENO_SIZE 4 +#define STAT_FILE_FILEOFFSET_SIZE 4 +#define STAT_FILE_LSN_SIZE (STAT_FILE_FILENO_SIZE + STAT_FILE_FILEOFFSET_SIZE) +#define STAT_FILE_MAX_SIZE (STAT_FILE_LSN_SIZE + STAT_FILE_FILENO_SIZE) + + +LSN last_checkpoint_lsn_at_startup; +uint32 last_logno_at_startup; -int control_file_fd; /* + Control file is less then 512 bytes (a disk sector), + to be as atomic as possible +*/ +static int control_file_fd; + + +/* + Initialize control file subsystem + + SYNOPSIS + control_file_create_or_open() + Looks for the control file. If absent, it's a fresh start, create file. If present, read it to find out last checkpoint's LSN and last log. Called at engine's start. + + RETURN + 0 - OK + 1 - Error */ int control_file_create_or_open() { - char buffer[4]; + char buffer[STAT_FILE_MAX_SIZE]; + char name[FN_REFLEN]; + MY_STAT stat_buff; + /* name is concatenation of Maria's home dir and "control" */ - if ((control_file_fd= my_open(name, O_RDWR)) < 0) + if (fn_format(name, "control", maria_data_root, "", MYF(MY_WME)) == NullS) + return 1; + + if ((control_file_fd= my_open(name, + O_CREAT | O_BINARY | /*O_DIRECT |*/ O_RDWR, + MYF(MY_WME))) < 0) + return 1; + + /* + TODO: from "man fsync" on Linux: + "fsync does not necessarily ensure that the entry in the direc- tory + containing the file has also reached disk. For that an explicit + fsync on the file descriptor of the directory is also needed." + So if we just created the file we should sync the directory. + Maybe there should be a flag of my_create() to do this. + */ + + if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) + return 1; + + if (stat_buff.st_size < STAT_FILE_MAX_SIZE) { - /* failure, try to create it */ - if ((control_file_fd= my_create(name, O_RDWR)) < 0) - return 1; /* - So this is a start from scratch, to be safer we should make sure that - there are no logs or data/index files around (indeed it could be that - the control file alone was deleted or not restored, and we should not - go on with life at this point. - For now we trust (this is alpha version), but for beta if would be great - to verify. + File shorter than expected (either we just created it, or a previous run + crashed between creation and first write); do first write. + */ + char buffer[STAT_FILE_MAX_SIZE]; + /* + To be safer we should make sure that there are no logs or data/index + files around (indeed it could be that the control file alone was deleted + or not restored, and we should not go on with life at this point). + + TODO: For now we trust (this is alpha version), but for beta if would + be great to verify. We could have a tool which can rebuild the control file, by reading the directory of logs, finding the newest log, reading it to find last checkpoint... Slow but can save your db. */ - last_checkpoint_lsn_at_startup= 0; - last_log_name_at_startup= NULL; - return 0; + last_checkpoint_lsn_at_startup.file_no= CONTROL_FILE_IMPOSSIBLE_LOGNO; + last_checkpoint_lsn_at_startup.rec_offset= 0; + last_logno_at_startup= CONTROL_FILE_IMPOSSIBLE_LOGNO; + + /* init the file with these "undefined" values */ + return control_file_write_and_force(last_checkpoint_lsn_at_startup, + last_logno_at_startup); } /* Already existing file, read it */ - if (my_read(control_file_fd, buffer, 8, MYF(MY_FNABP))) + if (my_read(control_file_fd, buffer, STAT_FILE_MAX_SIZE, + MYF(MY_FNABP | MY_WME))) return 1; - last_checkpoint_lsn_at_startup= uint8korr(buffer); - if (last_log_name_at_startup= my_malloc(512-8+1)) - return 1; - if (my_read(control_file_fd, last_log_name_at_startup, 512-8), MYF(MY_FNABP)) - return 1; - last_log_name[512-8]= 0; /* end zero to be nice */ + last_checkpoint_lsn_at_startup.file_no= uint4korr(buffer); + last_checkpoint_lsn_at_startup.rec_offset= uint4korr(buffer + + STAT_FILE_FILENO_SIZE); + last_logno_at_startup= uint4korr(buffer + STAT_FILE_LSN_SIZE); return 0; } + /* Write information durably to the control file. + + SYNOPSIS + control_file_write_and_force() + checkpoint_lsn LSN of checkpoint + log_no last log file number + args_to_write bitmap of 1 (write the LSN) and 2 (write the LOGNO) + Called when we have created a new log (after syncing this log's creation) and when we have written a checkpoint (after syncing this log record). + + RETURN + 0 - OK + 1 - Error */ -int control_file_write_and_force(LSN lsn, char *log_name) + +int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, + uint args_to_write) { - char buffer[512]; - uint start=8,end=8; - if (lsn != 0) /* LSN was specified */ + char buffer[STAT_FILE_MAX_SIZE]; + uint start= STAT_FILE_LSN_SIZE, end= STAT_FILE_LSN_SIZE; + /* + If LSN was specified... + + rec_offset can't be 0 in real LSN, because all files have header page + */ + if ((args_to_write & 1) && checkpoint_lsn) /* write checkpoint LSN */ { start= 0; - int8store(buffer, lsn); + int4store(buffer, checkpoint_lsn->file_no); + int4store(buffer + STAT_FILE_FILENO_SIZE, checkpoint_lsn->rec_offset); } - if (log_name != NULL) /* log name was specified */ + if (args_to_write & 2) /* write logno */ { - end= 512; - memcpy(buffer+8, log_name, 512-8); + end= STAT_FILE_MAX_SIZE; + int4store(buffer + STAT_FILE_LSN_SIZE, log_no); } DBUG_ASSERT(start != end); - return (my_pwrite(control_file_fd, buffer, end-start, start, MYF(MY_FNABP)) || - my_sync(control_file_fd)) + return (my_pwrite(control_file_fd, buffer + start, end - start, start, + MYF(MY_FNABP | MY_WME)) || + my_sync(control_file_fd, MYF(MY_WME))); +} + + +/* + Free resources taken by control file subsystem + + SYNOPSIS + control_file_end() +*/ + +void control_file_end() +{ + my_close(control_file_fd, MYF(MY_WME)); } diff --git a/storage/maria/control_file.h b/storage/maria/control_file.h index 522e7565341..add466e6cbd 100644 --- a/storage/maria/control_file.h +++ b/storage/maria/control_file.h @@ -4,10 +4,25 @@ Does not compile yet. */ +#ifndef _control_file_h +#define _control_file_h + +/* indicate absence of the log file number */ +#define CONTROL_FILE_IMPOSSIBLE_LOGNO 0xFFFFFFFF + /* Here is the interface of this module */ +/* + LSN of the last checkoint + (if last_checkpoint_lsn_at_startup.file_no == CONTROL_FILE_IMPOSSIBLE_LOGNO + then there was never a checkpoint) +*/ extern LSN last_checkpoint_lsn_at_startup; -extern char *last_log_name_at_startup; +/* + Last log number at startup time (if last_logno_at_startup == + CONTROL_FILE_IMPOSSIBLE_LOGNO then there is no log file yet) +*/ +extern uint32 last_logno_at_startup; /* Looks for the control file. If absent, it's a fresh start, create file. @@ -21,4 +36,11 @@ int control_file_create_or_open(); Called when we have created a new log (after syncing this log's creation) and when we have written a checkpoint (after syncing this log record). */ -int control_file_write_and_force(LSN lsn, char *log_name); +int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, + uint args_to_write); + + +/* Free resources taken by control file subsystem */ +void control_file_end(); + +#endif From 8a2901a7584a87a326eee67b00ccb9eb8519ea03 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 30 Aug 2006 10:55:27 +0200 Subject: [PATCH 23/95] write a magic string "MACF" (MAria Control File) at the start of the control file. lsn8store, lsn8korr. storage/maria/control_file.h: parameter changes name --- storage/maria/control_file.c | 118 ++++++++++++++++++++++++----------- storage/maria/control_file.h | 2 +- 2 files changed, 83 insertions(+), 37 deletions(-) diff --git a/storage/maria/control_file.c b/storage/maria/control_file.c index 22ea860456a..70eb62c645b 100644 --- a/storage/maria/control_file.c +++ b/storage/maria/control_file.c @@ -9,11 +9,20 @@ /* Here is the implementation of this module */ -/* should be sector size for atomic write operation */ -#define STAT_FILE_FILENO_SIZE 4 -#define STAT_FILE_FILEOFFSET_SIZE 4 -#define STAT_FILE_LSN_SIZE (STAT_FILE_FILENO_SIZE + STAT_FILE_FILEOFFSET_SIZE) -#define STAT_FILE_MAX_SIZE (STAT_FILE_LSN_SIZE + STAT_FILE_FILENO_SIZE) +/* + a control file contains 3 objects: magic string, LSN of last checkpoint, + number of last log. +*/ + +/* total size should be < sector size for atomic write operation */ +#define CONTROL_FILE_MAGIC_STRING "MACF" +#define CONTROL_FILE_MAGIC_STRING_OFFSET 0 +#define CONTROL_FILE_MAGIC_STRING_SIZE sizeof(CONTROL_FILE_MAGIC_STRING) +#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) +#define CONTROL_FILE_LSN_SIZE (4+4) +#define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) +#define CONTROL_FILE_FILENO_SIZE 4 +#define CONTROL_FILE_MAX_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) LSN last_checkpoint_lsn_at_startup; @@ -26,6 +35,19 @@ uint32 last_logno_at_startup; */ static int control_file_fd; +static void lsn8store(char *buffer, LSN *lsn) +{ + int4store(buffer, lsn->file_no); + int4store(buffer + CONTROL_FILE_FILENO_SIZE, lsn->rec_offset); +} + +static LSN lsn8korr(char *buffer) +{ + LSN tmp; + tmp.file_no= uint4korr(buffer); + tmp.rec_offset= uint4korr(buffer + CONTROL_FILE_FILENO_SIZE); + return tmp; +} /* Initialize control file subsystem @@ -43,10 +65,18 @@ static int control_file_fd; */ int control_file_create_or_open() { - char buffer[STAT_FILE_MAX_SIZE]; + char buffer[CONTROL_FILE_MAX_SIZE]; char name[FN_REFLEN]; MY_STAT stat_buff; + /* + If you change sizes in the #defines, you at least have to change the + "*store" and "*korr" calls in this file, and can even create backward + compatibility problems. Beware! + */ + DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (4+4)); + DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4); + /* name is concatenation of Maria's home dir and "control" */ if (fn_format(name, "control", maria_data_root, "", MYF(MY_WME)) == NullS) return 1; @@ -68,13 +98,13 @@ int control_file_create_or_open() if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) return 1; - if (stat_buff.st_size < STAT_FILE_MAX_SIZE) + if (stat_buff.st_size < CONTROL_FILE_MAX_SIZE) { /* File shorter than expected (either we just created it, or a previous run crashed between creation and first write); do first write. */ - char buffer[STAT_FILE_MAX_SIZE]; + char buffer[CONTROL_FILE_MAX_SIZE]; /* To be safer we should make sure that there are no logs or data/index files around (indeed it could be that the control file alone was deleted @@ -87,67 +117,83 @@ int control_file_create_or_open() directory of logs, finding the newest log, reading it to find last checkpoint... Slow but can save your db. */ - last_checkpoint_lsn_at_startup.file_no= CONTROL_FILE_IMPOSSIBLE_LOGNO; + last_checkpoint_lsn_at_startup.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO; last_checkpoint_lsn_at_startup.rec_offset= 0; - last_logno_at_startup= CONTROL_FILE_IMPOSSIBLE_LOGNO; + last_logno_at_startup= CONTROL_FILE_IMPOSSIBLE_FILENO; /* init the file with these "undefined" values */ return control_file_write_and_force(last_checkpoint_lsn_at_startup, - last_logno_at_startup); + last_logno_at_startup, + CONTROL_FILE_WRITE_ALL); } /* Already existing file, read it */ - if (my_read(control_file_fd, buffer, STAT_FILE_MAX_SIZE, + if (my_read(control_file_fd, buffer, CONTROL_FILE_MAX_SIZE, MYF(MY_FNABP | MY_WME))) return 1; - last_checkpoint_lsn_at_startup.file_no= uint4korr(buffer); - last_checkpoint_lsn_at_startup.rec_offset= uint4korr(buffer + - STAT_FILE_FILENO_SIZE); - last_logno_at_startup= uint4korr(buffer + STAT_FILE_LSN_SIZE); + if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, + CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE)) + return 1; + last_checkpoint_lsn_at_startup= lsn8korr(buffer + CONTROL_FILE_LSN_OFFSET); + last_logno_at_startup= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); return 0; } +#define CONTROL_FILE_WRITE_ALL 0 /* write all 3 objects */ +#define CONTROL_FILE_WRITE_ONLY_LSN 1 +#define CONTROL_FILE_WRITE_ONLY_LOGNO 2 /* Write information durably to the control file. SYNOPSIS control_file_write_and_force() - checkpoint_lsn LSN of checkpoint + checkpoint_lsn LSN of last checkpoint log_no last log file number - args_to_write bitmap of 1 (write the LSN) and 2 (write the LOGNO) + objs_to_write what we should write Called when we have created a new log (after syncing this log's creation) and when we have written a checkpoint (after syncing this log record). + NOTE + We always want to do one single my_pwrite() here to be as atomic as + possible. + RETURN 0 - OK 1 - Error */ int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, - uint args_to_write) + uint objs_to_write) { - char buffer[STAT_FILE_MAX_SIZE]; - uint start= STAT_FILE_LSN_SIZE, end= STAT_FILE_LSN_SIZE; - /* - If LSN was specified... - - rec_offset can't be 0 in real LSN, because all files have header page - */ - if ((args_to_write & 1) && checkpoint_lsn) /* write checkpoint LSN */ + char buffer[CONTROL_FILE_MAX_SIZE]; + uint start, size; + memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, + CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE); + /* write checkpoint LSN */ + if (checkpoint_lsn) + lsn8store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); + /* write logno */ + int4store(buffer + CONTROL_FILE_FILENO_OFFSET, log_no); + if (objs_to_write == CONTROL_FILE_WRITE_ALL) { - start= 0; - int4store(buffer, checkpoint_lsn->file_no); - int4store(buffer + STAT_FILE_FILENO_SIZE, checkpoint_lsn->rec_offset); + start= CONTROL_FILE_MAGIC_STRING_OFFSET; + size= CONTROL_FILE_MAX_SIZE; } - if (args_to_write & 2) /* write logno */ + else if (objs_to_write == CONTROL_FILE_WRITE_ONLY_LSN) { - end= STAT_FILE_MAX_SIZE; - int4store(buffer + STAT_FILE_LSN_SIZE, log_no); + start= CONTROL_FILE_LSN_OFFSET; + size= CONTROL_FILE_LSN_SIZE; } - DBUG_ASSERT(start != end); - return (my_pwrite(control_file_fd, buffer + start, end - start, start, - MYF(MY_FNABP | MY_WME)) || + else if (objs_to_write == CONTROL_FILE_WRITE_ONLY_LOGNO) + { + start= CONTROL_FILE_FILENO_OFFSET; + size= CONTROL_FILE_FILENO_SIZE; + } + else /* incorrect value of objs_to_write */ + DBUG_ASSERT(0); + return (my_pwrite(control_file_fd, buffer + start, size, + start, MYF(MY_FNABP | MY_WME)) || my_sync(control_file_fd, MYF(MY_WME))); } diff --git a/storage/maria/control_file.h b/storage/maria/control_file.h index add466e6cbd..66a1f225cd8 100644 --- a/storage/maria/control_file.h +++ b/storage/maria/control_file.h @@ -37,7 +37,7 @@ int control_file_create_or_open(); and when we have written a checkpoint (after syncing this log record). */ int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, - uint args_to_write); + uint objs_to_write); /* Free resources taken by control file subsystem */ From a1f25544d531b8909e58d36734ee6f65c1e189d5 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 1 Sep 2006 17:53:10 +0200 Subject: [PATCH 24/95] WL#3234 "Maria - control file manager" - fixes to the control file module - unit test for it - renames of all Maria files I created to start with ma_ storage/maria/ma_checkpoint.c: Rename: storage/maria/checkpoint.c -> storage/maria/ma_checkpoint.c storage/maria/ma_checkpoint.h: Rename: storage/maria/checkpoint.h -> storage/maria/ma_checkpoint.h storage/maria/ma_least_recently_dirtied.c: Rename: storage/maria/least_recently_dirtied.c -> storage/maria/ma_least_recently_dirtied.c storage/maria/ma_least_recently_dirtied.h: Rename: storage/maria/least_recently_dirtied.h -> storage/maria/ma_least_recently_dirtied.h storage/maria/ma_recovery.c: Rename: storage/maria/recovery.c -> storage/maria/ma_recovery.c storage/maria/ma_recovery.h: Rename: storage/maria/recovery.h -> storage/maria/ma_recovery.h storage/maria/Makefile.am: control file module and its unit test program storage/maria/ma_control_file.c: DBUG_ tags. Fix for gcc warnings. log_no -> logno (I felt "_no" sounded like a standalone "No" word). ma_ prefix for some functions. last_checkpoint_lsn_at_startup -> last_checkpoint_lsn (no need to make special vars for the values at startup). Same for last_logno. ma_control_file_write_and_force() now updates last_checkpoint_lsn and last_logno, the idea being that they belong to the module, others should not update them. And thus when the module shuts down, it zeroes those vars. storage/maria/ma_control_file.h: importing structs from Sanja to get the control file module to compile; we'll remove that when Sanja pushes the log handler. CONTROL_FILE_IMPOSSIBLE_LOGNO is 0, not FFFFFFFF. storage/maria/ma_control_file_test.c: Unit test program for the Maria control file module. Modelled after other ma_test* files in this directory (so, does not follow the unit test framework recently introduced with libtap; TODO as a task on all ma_test* programs). We test that writing to the control file works, and re-reading from it too, we check (by reading the file by ourselves) that its content on disk is correct, and check that a corrupted control file is detected. --- storage/maria/Makefile.am | 7 +- storage/maria/control_file.h | 46 --- .../maria/{checkpoint.c => ma_checkpoint.c} | 0 .../maria/{checkpoint.h => ma_checkpoint.h} | 0 .../{control_file.c => ma_control_file.c} | 105 ++++--- storage/maria/ma_control_file.h | 75 +++++ storage/maria/ma_control_file_test.c | 290 ++++++++++++++++++ ..._dirtied.c => ma_least_recently_dirtied.c} | 0 ..._dirtied.h => ma_least_recently_dirtied.h} | 0 storage/maria/{recovery.c => ma_recovery.c} | 0 storage/maria/{recovery.h => ma_recovery.h} | 0 11 files changed, 433 insertions(+), 90 deletions(-) delete mode 100644 storage/maria/control_file.h rename storage/maria/{checkpoint.c => ma_checkpoint.c} (100%) rename storage/maria/{checkpoint.h => ma_checkpoint.h} (100%) rename storage/maria/{control_file.c => ma_control_file.c} (66%) create mode 100644 storage/maria/ma_control_file.h create mode 100644 storage/maria/ma_control_file_test.c rename storage/maria/{least_recently_dirtied.c => ma_least_recently_dirtied.c} (100%) rename storage/maria/{least_recently_dirtied.h => ma_least_recently_dirtied.h} (100%) rename storage/maria/{recovery.c => ma_recovery.c} (100%) rename storage/maria/{recovery.h => ma_recovery.h} (100%) diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index d4315b4d446..e2689698d62 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -27,7 +27,7 @@ pkglib_LIBRARIES = libmaria.a bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_chk_DEPENDENCIES= $(LIBRARIES) maria_pack_DEPENDENCIES=$(LIBRARIES) -noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test +noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test ma_control_file_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test2_DEPENDENCIES= $(LIBRARIES) @@ -53,8 +53,9 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_ft_update.c ma_ft_boolean_search.c \ ma_ft_nlq_search.c ft_maria.c ma_sort.c \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ - ma_sp_key.c -CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? + ma_sp_key.c \ + ma_control_file.c +CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? maria_control DEFS = SUFFIXES = .sh diff --git a/storage/maria/control_file.h b/storage/maria/control_file.h deleted file mode 100644 index 66a1f225cd8..00000000000 --- a/storage/maria/control_file.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - WL#3234 Maria control file - First version written by Guilhem Bichot on 2006-04-27. - Does not compile yet. -*/ - -#ifndef _control_file_h -#define _control_file_h - -/* indicate absence of the log file number */ -#define CONTROL_FILE_IMPOSSIBLE_LOGNO 0xFFFFFFFF - -/* Here is the interface of this module */ - -/* - LSN of the last checkoint - (if last_checkpoint_lsn_at_startup.file_no == CONTROL_FILE_IMPOSSIBLE_LOGNO - then there was never a checkpoint) -*/ -extern LSN last_checkpoint_lsn_at_startup; -/* - Last log number at startup time (if last_logno_at_startup == - CONTROL_FILE_IMPOSSIBLE_LOGNO then there is no log file yet) -*/ -extern uint32 last_logno_at_startup; - -/* - Looks for the control file. If absent, it's a fresh start, create file. - If present, read it to find out last checkpoint's LSN and last log. - Called at engine's start. -*/ -int control_file_create_or_open(); - -/* - Write information durably to the control file. - Called when we have created a new log (after syncing this log's creation) - and when we have written a checkpoint (after syncing this log record). -*/ -int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, - uint objs_to_write); - - -/* Free resources taken by control file subsystem */ -void control_file_end(); - -#endif diff --git a/storage/maria/checkpoint.c b/storage/maria/ma_checkpoint.c similarity index 100% rename from storage/maria/checkpoint.c rename to storage/maria/ma_checkpoint.c diff --git a/storage/maria/checkpoint.h b/storage/maria/ma_checkpoint.h similarity index 100% rename from storage/maria/checkpoint.h rename to storage/maria/ma_checkpoint.h diff --git a/storage/maria/control_file.c b/storage/maria/ma_control_file.c similarity index 66% rename from storage/maria/control_file.c rename to storage/maria/ma_control_file.c index 70eb62c645b..d36e1c04c0c 100644 --- a/storage/maria/control_file.c +++ b/storage/maria/ma_control_file.c @@ -5,7 +5,7 @@ */ #include "maria_def.h" - +#include "ma_control_file.h" /* Here is the implementation of this module */ @@ -17,16 +17,22 @@ /* total size should be < sector size for atomic write operation */ #define CONTROL_FILE_MAGIC_STRING "MACF" #define CONTROL_FILE_MAGIC_STRING_OFFSET 0 -#define CONTROL_FILE_MAGIC_STRING_SIZE sizeof(CONTROL_FILE_MAGIC_STRING) +#define CONTROL_FILE_MAGIC_STRING_SIZE 4 #define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) #define CONTROL_FILE_LSN_SIZE (4+4) #define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) #define CONTROL_FILE_FILENO_SIZE 4 #define CONTROL_FILE_MAX_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) - -LSN last_checkpoint_lsn_at_startup; -uint32 last_logno_at_startup; +/* + This module owns these two vars. + uint32 is always atomically updated, but LSN is 8 bytes, we will need + provisions to ensure that it's updated atomically in + ma_control_file_write_and_force(). Probably the log mutex could be + used. TODO. +*/ +LSN last_checkpoint_lsn; +uint32 last_logno; /* @@ -35,7 +41,7 @@ uint32 last_logno_at_startup; */ static int control_file_fd; -static void lsn8store(char *buffer, LSN *lsn) +static void lsn8store(char *buffer, const LSN *lsn) { int4store(buffer, lsn->file_no); int4store(buffer + CONTROL_FILE_FILENO_SIZE, lsn->rec_offset); @@ -53,21 +59,23 @@ static LSN lsn8korr(char *buffer) Initialize control file subsystem SYNOPSIS - control_file_create_or_open() + ma_control_file_create_or_open() - Looks for the control file. If absent, it's a fresh start, create file. - If present, read it to find out last checkpoint's LSN and last log. + Looks for the control file. If absent, it's a fresh start, creates file. + If present, reads it to find out last checkpoint's LSN and last log, updates + the last_checkpoint_lsn and last_logno global variables. Called at engine's start. RETURN 0 - OK 1 - Error */ -int control_file_create_or_open() +int ma_control_file_create_or_open() { char buffer[CONTROL_FILE_MAX_SIZE]; char name[FN_REFLEN]; MY_STAT stat_buff; + DBUG_ENTER("ma_control_file_create_or_open"); /* If you change sizes in the #defines, you at least have to change the @@ -79,12 +87,12 @@ int control_file_create_or_open() /* name is concatenation of Maria's home dir and "control" */ if (fn_format(name, "control", maria_data_root, "", MYF(MY_WME)) == NullS) - return 1; + DBUG_RETURN(1); if ((control_file_fd= my_open(name, O_CREAT | O_BINARY | /*O_DIRECT |*/ O_RDWR, MYF(MY_WME))) < 0) - return 1; + DBUG_RETURN(1); /* TODO: from "man fsync" on Linux: @@ -96,16 +104,14 @@ int control_file_create_or_open() */ if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) - return 1; + DBUG_RETURN(1); - if (stat_buff.st_size < CONTROL_FILE_MAX_SIZE) + if ((uint)stat_buff.st_size < CONTROL_FILE_MAX_SIZE) { /* File shorter than expected (either we just created it, or a previous run crashed between creation and first write); do first write. - */ - char buffer[CONTROL_FILE_MAX_SIZE]; - /* + To be safer we should make sure that there are no logs or data/index files around (indeed it could be that the control file alone was deleted or not restored, and we should not go on with life at this point). @@ -117,38 +123,41 @@ int control_file_create_or_open() directory of logs, finding the newest log, reading it to find last checkpoint... Slow but can save your db. */ - last_checkpoint_lsn_at_startup.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO; - last_checkpoint_lsn_at_startup.rec_offset= 0; - last_logno_at_startup= CONTROL_FILE_IMPOSSIBLE_FILENO; + LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + uint32 imposs_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; /* init the file with these "undefined" values */ - return control_file_write_and_force(last_checkpoint_lsn_at_startup, - last_logno_at_startup, - CONTROL_FILE_WRITE_ALL); + DBUG_RETURN(ma_control_file_write_and_force(&imposs_lsn, imposs_logno, + CONTROL_FILE_WRITE_ALL)); } /* Already existing file, read it */ if (my_read(control_file_fd, buffer, CONTROL_FILE_MAX_SIZE, MYF(MY_FNABP | MY_WME))) - return 1; + DBUG_RETURN(1); if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE)) - return 1; - last_checkpoint_lsn_at_startup= lsn8korr(buffer + CONTROL_FILE_LSN_OFFSET); - last_logno_at_startup= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); - return 0; + { + /* + TODO: what is the good way to report the error? Knowing that this + happens at startup, probably stderr. + */ + DBUG_PRINT("error", ("bad magic string")); + DBUG_RETURN(1); + } + last_checkpoint_lsn= lsn8korr(buffer + CONTROL_FILE_LSN_OFFSET); + last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); + DBUG_RETURN(0); } -#define CONTROL_FILE_WRITE_ALL 0 /* write all 3 objects */ -#define CONTROL_FILE_WRITE_ONLY_LSN 1 -#define CONTROL_FILE_WRITE_ONLY_LOGNO 2 /* - Write information durably to the control file. + Write information durably to the control file; stores this information into + the last_checkpoint_lsn and last_logno global variables. SYNOPSIS - control_file_write_and_force() + ma_control_file_write_and_force() checkpoint_lsn LSN of last checkpoint - log_no last log file number + logno last log file number objs_to_write what we should write Called when we have created a new log (after syncing this log's creation) @@ -163,38 +172,44 @@ int control_file_create_or_open() 1 - Error */ -int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, +int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, uint objs_to_write) { char buffer[CONTROL_FILE_MAX_SIZE]; uint start, size; + DBUG_ENTER("ma_control_file_write_and_force"); + memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE); /* write checkpoint LSN */ if (checkpoint_lsn) lsn8store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); /* write logno */ - int4store(buffer + CONTROL_FILE_FILENO_OFFSET, log_no); + int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno); if (objs_to_write == CONTROL_FILE_WRITE_ALL) { start= CONTROL_FILE_MAGIC_STRING_OFFSET; size= CONTROL_FILE_MAX_SIZE; + last_checkpoint_lsn= *checkpoint_lsn; + last_logno= logno; } else if (objs_to_write == CONTROL_FILE_WRITE_ONLY_LSN) { start= CONTROL_FILE_LSN_OFFSET; size= CONTROL_FILE_LSN_SIZE; + last_checkpoint_lsn= *checkpoint_lsn; } else if (objs_to_write == CONTROL_FILE_WRITE_ONLY_LOGNO) { start= CONTROL_FILE_FILENO_OFFSET; size= CONTROL_FILE_FILENO_SIZE; + last_logno= logno; } else /* incorrect value of objs_to_write */ DBUG_ASSERT(0); - return (my_pwrite(control_file_fd, buffer + start, size, - start, MYF(MY_FNABP | MY_WME)) || - my_sync(control_file_fd, MYF(MY_WME))); + DBUG_RETURN(my_pwrite(control_file_fd, buffer + start, size, + start, MYF(MY_FNABP | MY_WME)) || + my_sync(control_file_fd, MYF(MY_WME))); } @@ -202,10 +217,18 @@ int control_file_write_and_force(LSN *checkpoint_lsn, uint32 log_no, Free resources taken by control file subsystem SYNOPSIS - control_file_end() + ma_control_file_end() */ -void control_file_end() +void ma_control_file_end() { + DBUG_ENTER("ma_control_file_end"); my_close(control_file_fd, MYF(MY_WME)); + /* + As this module owns these variables, closing the module forbids access to + them (just a safety): + */ + last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h new file mode 100644 index 00000000000..d081718b919 --- /dev/null +++ b/storage/maria/ma_control_file.h @@ -0,0 +1,75 @@ +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +#ifndef _control_file_h +#define _control_file_h + +/* + Not everybody needs to call the control file that's why control_file.h is + not in maria_def.h. However, policy or habit may want to change this. +*/ + +#ifndef REMOVE_WHEN_SANJA_PUSHES_LOG_HANDLER +/* + this is to get the control file to compile, until Sanja pushes the log + handler which will supersede those definitions. +*/ +typedef struct st_lsn { + uint32 file_no; + uint32 rec_offset; +} LSN; +#define maria_data_root "." +#endif + +/* + indicate absence of the log file number; first log is always number 1, 0 is + impossible. +*/ +#define CONTROL_FILE_IMPOSSIBLE_FILENO 0 +/* logs always have a header */ +#define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0 +/* + indicate absence of LSN. +*/ +#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN){CONTROL_FILE_IMPOSSIBLE_FILENO,CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET}) + +/* Here is the interface of this module */ + +/* + LSN of the last checkoint + (if last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO + then there was never a checkpoint) +*/ +extern LSN last_checkpoint_lsn; +/* + Last log number (if last_logno == + CONTROL_FILE_IMPOSSIBLE_FILENO then there is no log file yet) +*/ +extern uint32 last_logno; + +/* + Looks for the control file. If absent, it's a fresh start, create file. + If present, read it to find out last checkpoint's LSN and last log. + Called at engine's start. +*/ +int ma_control_file_create_or_open(); + +/* + Write information durably to the control file. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). +*/ +#define CONTROL_FILE_WRITE_ALL 0 /* write all 3 objects */ +#define CONTROL_FILE_WRITE_ONLY_LSN 1 +#define CONTROL_FILE_WRITE_ONLY_LOGNO 2 +int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, + uint objs_to_write); + + +/* Free resources taken by control file subsystem */ +void ma_control_file_end(); + +#endif diff --git a/storage/maria/ma_control_file_test.c b/storage/maria/ma_control_file_test.c new file mode 100644 index 00000000000..b3ba27c8e4b --- /dev/null +++ b/storage/maria/ma_control_file_test.c @@ -0,0 +1,290 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Unit test of the control file module of the Maria engine */ + +/* TODO: make it fit the mytap framework */ + +/* + Note that it is not possible to test the durability of the write (can't + pull the plug programmatically :) +*/ + +#include "maria.h" +#include "ma_control_file.h" +#include + +char file_name[FN_REFLEN]; +int fd= -1; + +static void clean_files(); +static void run_test_normal(); +static void run_test_abnormal(); +static void usage(); +static void get_options(int argc, char *argv[]); + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + + get_options(argc,argv); + + clean_files(); + run_test_normal(); + run_test_abnormal(); + + exit(0); /* all ok, if some test failed, we will have aborted */ +} + +/* + Abort unless given expression is non-zero. + + SYNOPSIS + DIE_UNLESS(expr) + + DESCRIPTION + We can't use any kind of system assert as we need to + preserve tested invariants in release builds as well. + + NOTE + This is infamous copy-paste from mysql_client_test.c; + we should instead put it in some include in one single place. +*/ + +#define DIE_UNLESS(expr) \ + ((void) ((expr) ? 0 : (die(__FILE__, __LINE__, #expr), 0))) +#define DIE_IF(expr) \ + ((void) (!(expr) ? 0 : (die(__FILE__, __LINE__, #expr), 0))) +#define DIE(expr) \ + die(__FILE__, __LINE__, #expr) + +void die(const char *file, int line, const char *expr) +{ + fprintf(stderr, "%s:%d: check failed: '%s'\n", file, line, expr); + abort(); +} + + +static void clean_files() +{ + DIE_IF(fn_format(file_name, "control", maria_data_root, "", MYF(MY_WME)) == + NullS); + my_delete(file_name, MYF(0)); /* maybe file does not exist, ignore error */ +} + + +static void run_test_normal() +{ + LSN checkpoint_lsn; + uint32 logno; + uint objs_to_write; + uint i; + char buffer[4]; + + /* TEST0: Instance starts from scratch (control file does not exist) */ + DIE_UNLESS(ma_control_file_create_or_open() == 0); + /* Check that the module reports no information */ + DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); + + /* TEST1: Simulate creation of one log */ + + objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO; + logno= 123; + DIE_UNLESS(ma_control_file_write_and_force(NULL, logno, + objs_to_write) == 0); + /* Check that last_logno was updated */ + DIE_UNLESS(last_logno == logno); + /* Simulate shutdown */ + ma_control_file_end(); + /* Verify amnesia */ + DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); + /* And restart */ + DIE_UNLESS(ma_control_file_create_or_open() == 0); + DIE_UNLESS(last_logno == logno); + + /* TEST2: Simulate creation of 5 logs */ + + objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO; + logno= 100; + for (i= 0; i<5; i++) + { + logno*= 3; + DIE_UNLESS(ma_control_file_write_and_force(NULL, logno, + objs_to_write) == 0); + } + ma_control_file_end(); + DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); + DIE_UNLESS(ma_control_file_create_or_open() == 0); + DIE_UNLESS(last_logno == logno); + + /* + TEST3: Simulate one checkpoint, one log creation, two checkpoints, one + log creation. + */ + + objs_to_write= CONTROL_FILE_WRITE_ONLY_LSN; + checkpoint_lsn= (LSN){5, 10000}; + logno= 10; + DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, + objs_to_write) == 0); + /* check that last_logno was not updated */ + DIE_UNLESS(last_logno != logno); + /* Check that last_checkpoint_lsn was updated */ + DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset); + + objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO; + checkpoint_lsn= (LSN){5, 20000}; + logno= 17; + DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, + objs_to_write) == 0); + /* Check that checkpoint LSN was not updated */ + DIE_UNLESS(last_checkpoint_lsn.rec_offset != checkpoint_lsn.rec_offset); + objs_to_write= CONTROL_FILE_WRITE_ONLY_LSN; + checkpoint_lsn= (LSN){17, 20000}; + DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, + objs_to_write) == 0); + objs_to_write= CONTROL_FILE_WRITE_ONLY_LSN; + checkpoint_lsn= (LSN){17, 45000}; + DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, + objs_to_write) == 0); + objs_to_write= CONTROL_FILE_WRITE_ONLY_LOGNO; + logno= 19; + DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, + objs_to_write) == 0); + + ma_control_file_end(); + DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); + DIE_UNLESS(ma_control_file_create_or_open() == 0); + DIE_UNLESS(last_logno == logno); + DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset); + + /* + TEST4: actually check by ourselves the content of the file. + Note that constants (offsets) are hard-coded here, precisely to prevent + someone from changing them in the control file module and breaking + backward-compatibility. + */ + + DIE_IF((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) < 0); + DIE_IF(my_read(fd, buffer, 16, MYF(MY_FNABP | MY_WME)) != 0); + DIE_IF(my_close(fd, MYF(MY_WME)) != 0); + i= uint4korr(buffer+4); + DIE_UNLESS(i == last_checkpoint_lsn.file_no); + i= uint4korr(buffer+8); + DIE_UNLESS(i == last_checkpoint_lsn.rec_offset); + i= uint4korr(buffer+12); + DIE_UNLESS(i == last_logno); + + + /* TEST5: Simulate stop/start/nothing/stop/start */ + + ma_control_file_end(); + DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(ma_control_file_create_or_open() == 0); + ma_control_file_end(); + DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + DIE_UNLESS(ma_control_file_create_or_open() == 0); + DIE_UNLESS(last_logno == logno); + DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no); + DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset); + +} + +static void run_test_abnormal() +{ + /* Corrupt the control file */ + DIE_IF((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) < 0); + DIE_IF(my_write(fd, "papa", 4, MYF(MY_FNABP | MY_WME)) != 0); + DIE_IF(my_close(fd, MYF(MY_WME)) != 0); + + /* Check that control file module sees the problem */ + DIE_IF(ma_control_file_create_or_open() == 0); +} + + +static struct my_option my_long_options[] = +{ +#ifndef DBUG_OFF + {"debug", '#', "Debug log.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static void version() +{ + printf("ma_control_file_test: unit test for the control file " + "module of the Maria storage engine. Ver 1.0 \n"); +} + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'V': + version(); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + version(); + usage(); + exit(0); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c similarity index 100% rename from storage/maria/least_recently_dirtied.c rename to storage/maria/ma_least_recently_dirtied.c diff --git a/storage/maria/least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h similarity index 100% rename from storage/maria/least_recently_dirtied.h rename to storage/maria/ma_least_recently_dirtied.h diff --git a/storage/maria/recovery.c b/storage/maria/ma_recovery.c similarity index 100% rename from storage/maria/recovery.c rename to storage/maria/ma_recovery.c diff --git a/storage/maria/recovery.h b/storage/maria/ma_recovery.h similarity index 100% rename from storage/maria/recovery.h rename to storage/maria/ma_recovery.h From fa793bca310a208242d315b763bb512c252cfa49 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 4 Sep 2006 16:53:09 +0200 Subject: [PATCH 25/95] WL#3234 "Maria - control file manager": added checksum of the file. Now we have size + magic string + checksum to detect that all is ok. Plus misc fixes for "make dist" to work and the resulting tarball to build include/Makefile.am: adding pagecache.h to help the tarball build. The model of pagecache.h, keycache.h, is in pkginclude_HEADERS, wonder why. Adding pagecache.h to noinst_HEADERS for now. storage/maria/Makefile.am: adding ma_control_file.h to help the tarball build storage/maria/ma_control_file.c: adding a simple checksum to the control file. We protect against corruption of this file like this: - test size - test magic string at start - test checksum I also add some simple my_message() errors (to be changed to a better reporting later). storage/maria/ma_control_file.h: comments storage/maria/ma_control_file_test.c: test of wrong checksum in control file storage/maria/CMakeLists.txt: just to make "make dist" happy for now. --- include/Makefile.am | 2 +- storage/maria/CMakeLists.txt | 1 + storage/maria/Makefile.am | 4 +- storage/maria/ma_control_file.c | 191 +++++++++++++++++---------- storage/maria/ma_control_file.h | 6 +- storage/maria/ma_control_file_test.c | 52 +++++--- 6 files changed, 169 insertions(+), 87 deletions(-) create mode 100644 storage/maria/CMakeLists.txt diff --git a/include/Makefile.am b/include/Makefile.am index e81fa3a6520..bf7dcd265be 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -33,7 +33,7 @@ noinst_HEADERS = config-win.h config-netware.h \ mysql_version.h.in my_handler.h my_time.h decimal.h \ my_vle.h my_user.h my_atomic.h atomic/nolock.h \ atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \ - my_libwrap.h + my_libwrap.h pagecache.h # mysql_version.h are generated CLEANFILES = mysql_version.h my_config.h readline openssl diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt new file mode 100644 index 00000000000..cfe23054e2f --- /dev/null +++ b/storage/maria/CMakeLists.txt @@ -0,0 +1 @@ +# empty for the moment; will fill it when we build under Windows diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index e2689698d62..15a5771b5d2 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -28,7 +28,9 @@ bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_chk_DEPENDENCIES= $(LIBRARIES) maria_pack_DEPENDENCIES=$(LIBRARIES) noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test ma_control_file_test -noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h +noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ + ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h \ + ma_control_file.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test2_DEPENDENCIES= $(LIBRARIES) ma_test3_DEPENDENCIES= $(LIBRARIES) diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index d36e1c04c0c..5861246baf9 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -17,12 +17,14 @@ /* total size should be < sector size for atomic write operation */ #define CONTROL_FILE_MAGIC_STRING "MACF" #define CONTROL_FILE_MAGIC_STRING_OFFSET 0 -#define CONTROL_FILE_MAGIC_STRING_SIZE 4 -#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) +#define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1) +#define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) +#define CONTROL_FILE_CHECKSUM_SIZE 1 +#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE) #define CONTROL_FILE_LSN_SIZE (4+4) #define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) #define CONTROL_FILE_FILENO_SIZE 4 -#define CONTROL_FILE_MAX_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) +#define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) /* This module owns these two vars. @@ -55,6 +57,16 @@ static LSN lsn8korr(char *buffer) return tmp; } +static char simple_checksum(char *buffer, uint size) +{ + /* TODO: improve this sum if we want */ + char s= 0; + uint i; + for (i= 0; i Date: Thu, 7 Sep 2006 11:12:37 +0200 Subject: [PATCH 26/95] Merge of Myisam changes into Maria. First step: ha_maria moves to storage/maria. BitKeeper/deleted/.del-ft_maria.c: Delete: storage/maria/ft_maria.c storage/maria/ha_maria.h: Rename: sql/ha_maria.h -> storage/maria/ha_maria.h storage/maria/ha_maria.cc: Rename: sql/ha_maria.cc -> storage/maria/ha_maria.cc libmysqld/Makefile.am: ha_maria moves to other dir (like myisam has) sql/Makefile.am: ha_maria moves to other dir (like myisam has) sql/mysqld.cc: ha_maria moves to other dir (like myisam has) storage/maria/Makefile.am: I delete ft_maria.c like ft_myisam.c has storage/maria/ma_test_all.sh: -l option is removed (no MyISAM log in Maria), maria_log is removed too. --- libmysqld/Makefile.am | 2 +- sql/Makefile.am | 4 +-- sql/mysqld.cc | 2 +- storage/maria/Makefile.am | 2 +- storage/maria/ft_maria.c | 49 ------------------------------ {sql => storage/maria}/ha_maria.cc | 0 {sql => storage/maria}/ha_maria.h | 0 storage/maria/ma_test_all.sh | 6 ++-- 8 files changed, 7 insertions(+), 58 deletions(-) delete mode 100644 storage/maria/ft_maria.c rename {sql => storage/maria}/ha_maria.cc (100%) rename {sql => storage/maria}/ha_maria.h (100%) diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index 07d8a98362d..befbd3fad3a 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -46,7 +46,7 @@ noinst_HEADERS = embedded_priv.h emb_qcache.h sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ ha_innodb.cc ha_berkeley.cc ha_federated.cc ha_ndbcluster.cc \ - ha_ndbcluster_binlog.cc ha_partition.cc ha_maria.cc\ + ha_ndbcluster_binlog.cc ha_partition.cc \ handler.cc sql_handler.cc \ hostname.cc init.cc password.c \ item.cc item_buff.cc item_cmpfunc.cc item_create.cc \ diff --git a/sql/Makefile.am b/sql/Makefile.am index 0e4621c688e..e453bd6010f 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -52,7 +52,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ ha_heap.h ha_myisam.h ha_myisammrg.h ha_partition.h \ ha_innodb.h ha_berkeley.h ha_federated.h \ ha_ndbcluster.h ha_ndbcluster_binlog.h \ - ha_ndbcluster_tables.h ha_maria.h\ + ha_ndbcluster_tables.h \ opt_range.h protocol.h rpl_tblmap.h \ log.h sql_show.h rpl_rli.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h \ @@ -89,7 +89,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ records.cc filesort.cc handler.cc \ ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ ha_partition.cc ha_innodb.cc ha_berkeley.cc \ - ha_federated.cc ha_maria.cc\ + ha_federated.cc \ ha_ndbcluster.cc ha_ndbcluster_binlog.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a666bec32d7..6465759af77 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -28,7 +28,7 @@ #include "ha_myisam.h" #ifdef WITH_MARIA_STORAGE_ENGINE -#include "ha_maria.h" +#include "../storage/maria/ha_maria.h" #endif #ifdef HAVE_ROW_BASED_REPLICATION diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 15a5771b5d2..0271e29461e 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -53,7 +53,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_delete_table.c ma_rename.c ma_check.c \ ma_keycache.c ma_preload.c ma_ft_parser.c \ ma_ft_update.c ma_ft_boolean_search.c \ - ma_ft_nlq_search.c ft_maria.c ma_sort.c \ + ma_ft_nlq_search.c ma_sort.c \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ ma_sp_key.c \ ma_control_file.c diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c deleted file mode 100644 index 7104c6704ba..00000000000 --- a/storage/maria/ft_maria.c +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ - -/* - This function is for interface functions between fulltext and maria -*/ - -#include "ma_ftdefs.h" - -FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr, - byte *query, uint query_len, CHARSET_INFO *cs, - byte *record) -{ - FT_INFO *res; - if (flags & FT_BOOL) - res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query, - query_len, cs); - else - res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len, - flags, record); - return res; -} - -const struct _ft_vft _ma_ft_vft_nlq = { - maria_ft_nlq_read_next, maria_ft_nlq_find_relevance, - maria_ft_nlq_close_search, maria_ft_nlq_get_relevance, - maria_ft_nlq_reinit_search -}; -const struct _ft_vft _ma_ft_vft_boolean = { - maria_ft_boolean_read_next, maria_ft_boolean_find_relevance, - maria_ft_boolean_close_search, maria_ft_boolean_get_relevance, - maria_ft_boolean_reinit_search -}; - diff --git a/sql/ha_maria.cc b/storage/maria/ha_maria.cc similarity index 100% rename from sql/ha_maria.cc rename to storage/maria/ha_maria.cc diff --git a/sql/ha_maria.h b/storage/maria/ha_maria.h similarity index 100% rename from sql/ha_maria.h rename to storage/maria/ha_maria.h diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index f857127dca9..ec4d1778db9 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -134,10 +134,8 @@ echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" ./ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L ./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -L -K -W -P -m50 -l -./maria_log$suffix -./ma_test2$suffix $silent -L -K -W -P -m50 -l -b100 -./maria_log$suffix +./ma_test2$suffix $silent -L -K -W -P -m50 +./ma_test2$suffix $silent -L -K -W -P -m50 -b100 time ./ma_test2$suffix $silent time ./ma_test2$suffix $silent -K -B time ./ma_test2$suffix $silent -L -B From 4c6971b40225446e3f142d485ce6e93766122136 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 7 Sep 2006 17:07:17 +0200 Subject: [PATCH 27/95] Manually merging changes made to MyISAM into Maria. End of merge. storage/maria/ft_maria.c: Rename: BitKeeper/deleted/.del-ft_maria.c -> storage/maria/ft_maria.c configure.in: maria moves to its plug.in storage/maria/Makefile.am: merging changes made to MyISAM into Maria. ft_maria.c is still needed. storage/maria/ha_maria.cc: merging changes made to MyISAM into Maria storage/maria/ma_dynrec.c: merging changes made to MyISAM into Maria storage/maria/ma_extra.c: merging changes made to MyISAM into Maria storage/maria/ma_ft_parser.c: merging changes made to MyISAM into Maria storage/maria/ma_open.c: merging changes made to MyISAM into Maria storage/maria/ma_sort.c: merging changes made to MyISAM into Maria storage/maria/ma_update.c: merging changes made to MyISAM into Maria storage/maria/ma_write.c: merging changes made to MyISAM into Maria storage/maria/maria_def.h: merging changes made to MyISAM into Maria storage/myisam/Makefile.am: merging changes made to MyISAM into Maria storage/maria/plug.in: merging changes made to MyISAM into Maria --- configure.in | 7 -- storage/maria/Makefile.am | 79 ++++++++++++++--- storage/maria/ft_maria.c | 49 +++++++++++ storage/maria/ha_maria.cc | 30 +++++-- storage/maria/ma_dynrec.c | 163 +++++++++++++++++++++++++++++------ storage/maria/ma_extra.c | 5 ++ storage/maria/ma_ft_parser.c | 2 +- storage/maria/ma_open.c | 1 + storage/maria/ma_sort.c | 11 ++- storage/maria/ma_update.c | 13 ++- storage/maria/ma_write.c | 12 +++ storage/maria/maria_def.h | 3 + storage/maria/plug.in | 7 ++ storage/myisam/Makefile.am | 2 +- 14 files changed, 319 insertions(+), 65 deletions(-) create mode 100644 storage/maria/ft_maria.c create mode 100644 storage/maria/plug.in diff --git a/configure.in b/configure.in index bf9a8286d01..f0cdcde1abb 100644 --- a/configure.in +++ b/configure.in @@ -2149,13 +2149,6 @@ MYSQL_STORAGE_ENGINE(partition, partition, [Partition Support], [MySQL Partitioning Support], [max,max-no-ndb]) dnl -- ndbcluster requires partition to be enabled -MYSQL_PLUGIN_DEPENDS(ndbcluster, [partition]) - -MYSQL_STORAGE_ENGINE(maria,maria, [Maria Storage Engine], - [Traditional transactional MySQL tables], [max,max-no-ndb]) -MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) -MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) -MYSQL_PLUGIN_MANDATORY(maria) MYSQL_CONFIGURE_PLUGINS([none]) diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 0271e29461e..f5678a55266 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -14,31 +14,87 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ + EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt pkgdata_DATA = ma_test_all ma_test_all.res - -INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include -LDADD = @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ - $(top_builddir)/storage/myisam/libmyisam.a \ - $(top_builddir)/mysys/libmysys.a \ - $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ pkglib_LIBRARIES = libmaria.a bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_chk_DEPENDENCIES= $(LIBRARIES) +# Only reason to link with libmyisam.a here is that it's where some fulltext +# pieces are (but soon we'll remove fulltext dependencies from Maria). +# For now, it imposes that storage/myisam be built before storage/maria. +maria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ maria_pack_DEPENDENCIES=$(LIBRARIES) +maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test ma_control_file_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h \ - ma_control_file.h + ma_control_file.h ha_maria.h ma_test1_DEPENDENCIES= $(LIBRARIES) +ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_test2_DEPENDENCIES= $(LIBRARIES) +ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_test3_DEPENDENCIES= $(LIBRARIES) +ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ #ma_ft_test1_DEPENDENCIES= $(LIBRARIES) #ma_ft_eval_DEPENDENCIES= $(LIBRARIES) maria_ftdump_DEPENDENCIES= $(LIBRARIES) +maria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_rt_test_DEPENDENCIES= $(LIBRARIES) +ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ ma_sp_test_DEPENDENCIES= $(LIBRARIES) +ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_control_file_test_DEPENDENCIES= $(LIBRARIES) +ma_control_file_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_rnext.c ma_rnext_same.c \ ma_search.c ma_page.c ma_key.c ma_locking.c \ @@ -53,12 +109,11 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_delete_table.c ma_rename.c ma_check.c \ ma_keycache.c ma_preload.c ma_ft_parser.c \ ma_ft_update.c ma_ft_boolean_search.c \ - ma_ft_nlq_search.c ma_sort.c \ + ma_ft_nlq_search.c ft_maria.c ma_sort.c \ + ha_maria.cc \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ - ma_sp_key.c \ - ma_control_file.c + ma_sp_key.c ma_control_file.c CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? maria_control -DEFS = SUFFIXES = .sh diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c new file mode 100644 index 00000000000..7104c6704ba --- /dev/null +++ b/storage/maria/ft_maria.c @@ -0,0 +1,49 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and maria +*/ + +#include "ma_ftdefs.h" + +FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query, + query_len, cs); + else + res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len, + flags, record); + return res; +} + +const struct _ft_vft _ma_ft_vft_nlq = { + maria_ft_nlq_read_next, maria_ft_nlq_find_relevance, + maria_ft_nlq_close_search, maria_ft_nlq_get_relevance, + maria_ft_nlq_reinit_search +}; +const struct _ft_vft _ma_ft_vft_boolean = { + maria_ft_boolean_read_next, maria_ft_boolean_find_relevance, + maria_ft_boolean_close_search, maria_ft_boolean_get_relevance, + maria_ft_boolean_reinit_search +}; + diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 29718f1493e..a0084ef5e9c 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -19,18 +19,15 @@ #pragma implementation // gcc: Class implementation #endif +#define MYSQL_SERVER 1 #include "mysql_priv.h" +#include #include #include #include "ha_maria.h" -#ifndef MASTER -#include "../srclib/maria/maria_def.h" -#else -#include "../storage/maria/maria_def.h" -#include "../storage/maria/ma_rt_index.h" -#endif -#include +#include "maria_def.h" +#include "ma_rt_index.h" ulong maria_recover_options= HA_RECOVER_NONE; @@ -288,6 +285,15 @@ bool ha_maria::check_if_locking_is_allowed(uint sql_command, table->s->table_name.str); return FALSE; } + + /* + Deny locking of the log tables, which is incompatible with + concurrent insert. Unless called from a logger THD: + general_log_thd or slow_log_thd. + */ + if (!called_by_logger_thread) + return check_if_log_table_locking_is_allowed(sql_command, type, table); + return TRUE; } @@ -486,11 +492,14 @@ int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt) HA_CHECK_OPT tmp_check_opt; char *backup_dir= thd->lex->backup_dir; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - const char *table_name= table->s->table_name.str; + char table_name[FN_REFLEN]; int error; const char *errmsg; DBUG_ENTER("restore"); + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, MARIA_NAME_DEXT)) DBUG_RETURN(HA_ADMIN_INVALID); @@ -526,11 +535,14 @@ int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt) { char *backup_dir= thd->lex->backup_dir; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - const char *table_name= table->s->table_name.str; + char table_name[FN_REFLEN]; int error; const char *errmsg; DBUG_ENTER("ha_maria::backup"); + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, reg_ext)) { diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 047826408c3..253a538861a 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1304,12 +1304,41 @@ void _ma_store_blob_length(byte *pos,uint pack_length,uint length) } - /* Read record from datafile */ - /* Returns 0 if ok, -1 if error */ +/* + Read record from datafile. + + SYNOPSIS + _ma_read_dynamic_record() + info MARIA_HA pointer to table. + filepos From where to read the record. + buf Destination for record. + + NOTE + + If a write buffer is active, it needs to be flushed if its contents + intersects with the record to read. We always check if the position + of the first byte of the write buffer is lower than the position + past the last byte to read. In theory this is also true if the write + buffer is completely below the read segment. That is, if there is no + intersection. But this case is unusual. We flush anyway. Only if the + first byte in the write buffer is above the last byte to read, we do + not flush. + + A dynamic record may need several reads. So this check must be done + before every read. Reading a dynamic record starts with reading the + block header. If the record does not fit into the free space of the + header, the block may be longer than the header. In this case a + second read is necessary. These one or two reads repeat for every + part of the record. + + RETURN + 0 OK + -1 Error +*/ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) { - int flag; + int block_of_record; uint b_type,left_length; byte *to; MARIA_BLOCK_INFO block_info; @@ -1321,20 +1350,19 @@ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) LINT_INIT(to); LINT_INIT(left_length); file=info->dfile; - block_info.next_filepos=filepos; /* for easyer loop */ - flag=block_info.second_read=0; + block_of_record= 0; /* First block of record is numbered as zero. */ + block_info.second_read= 0; do - { + { + /* A corrupted table can have wrong pointers. (Bug# 19835) */ + if (filepos == HA_OFFSET_ERROR) + goto panic; if (info->opt_flag & WRITE_CACHE_USED && - info->rec_cache.pos_in_file <= block_info.next_filepos && + info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH && flush_io_cache(&info->rec_cache)) goto err; - /* A corrupted table can have wrong pointers. (Bug# 19835) */ - if (block_info.next_filepos == HA_OFFSET_ERROR) - goto panic; info->rec_cache.seek_not_done=1; - if ((b_type= _ma_get_block_info(&block_info,file, - block_info.next_filepos)) + if ((b_type= _ma_get_block_info(&block_info, file, filepos)) & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) { @@ -1342,15 +1370,14 @@ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) my_errno=HA_ERR_RECORD_DELETED; goto err; } - if (flag == 0) /* First block */ + if (block_of_record++ == 0) /* First block */ { - flag=1; if (block_info.rec_len > (uint) info->s->base.max_pack_length) goto panic; if (info->s->base.blobs) { if (!(to=_ma_alloc_rec_buff(info, block_info.rec_len, - &info->rec_buff))) + &info->rec_buff))) goto err; } else @@ -1359,11 +1386,41 @@ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) } if (left_length < block_info.data_len || ! block_info.data_len) goto panic; /* Wrong linked record */ - if (info->s->file_read(info,(byte*) to,block_info.data_len,block_info.filepos, - MYF(MY_NABP))) - goto panic; - left_length-=block_info.data_len; - to+=block_info.data_len; + /* copy information that is already read */ + { + uint offset= (uint) (block_info.filepos - filepos); + uint prefetch_len= (sizeof(block_info.header) - offset); + filepos+= sizeof(block_info.header); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy((byte*) to, block_info.header + offset, prefetch_len); + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + /* read rest of record from file */ + if (block_info.data_len) + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < filepos + block_info.data_len && + flush_io_cache(&info->rec_cache)) + goto err; + /* + What a pity that this method is not called 'file_pread' and that + there is no equivalent without seeking. We are at the right + position already. :( + */ + if (info->s->file_read(info, (byte*) to, block_info.data_len, + filepos, MYF(MY_NABP))) + goto panic; + left_length-=block_info.data_len; + to+=block_info.data_len; + } + filepos= block_info.next_filepos; } while (left_length); info->update|= HA_STATE_AKTIV; /* We have a aktive record */ @@ -1520,11 +1577,45 @@ err: } +/* + Read record from datafile. + + SYNOPSIS + _ma_read_rnd_dynamic_record() + info MARIA_HA pointer to table. + buf Destination for record. + filepos From where to read the record. + skip_deleted_blocks If to repeat reading until a non-deleted + record is found. + + NOTE + + If a write buffer is active, it needs to be flushed if its contents + intersects with the record to read. We always check if the position + of the first byte of the write buffer is lower than the position + past the last byte to read. In theory this is also true if the write + buffer is completely below the read segment. That is, if there is no + intersection. But this case is unusual. We flush anyway. Only if the + first byte in the write buffer is above the last byte to read, we do + not flush. + + A dynamic record may need several reads. So this check must be done + before every read. Reading a dynamic record starts with reading the + block header. If the record does not fit into the free space of the + header, the block may be longer than the header. In this case a + second read is necessary. These one or two reads repeat for every + part of the record. + + RETURN + 0 OK + != 0 Error +*/ + int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, register my_off_t filepos, my_bool skip_deleted_blocks) { - int flag,info_read,save_errno; + int block_of_record, info_read, save_errno; uint left_len,b_type; byte *to; MARIA_BLOCK_INFO block_info; @@ -1544,7 +1635,8 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, else info_read=1; /* memory-keyinfoblock is ok */ - flag=block_info.second_read=0; + block_of_record= 0; /* First block of record is numbered as zero. */ + block_info.second_read= 0; left_len=1; do { @@ -1567,15 +1659,15 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, { if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, sizeof(block_info.header), - (!flag && skip_deleted_blocks ? READING_NEXT : 0) | - READING_HEADER)) + (!block_of_record && skip_deleted_blocks ? + READING_NEXT : 0) | READING_HEADER)) goto panic; b_type= _ma_get_block_info(&block_info,-1,filepos); } else { if (info->opt_flag & WRITE_CACHE_USED && - info->rec_cache.pos_in_file <= filepos && + info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH && flush_io_cache(&info->rec_cache)) DBUG_RETURN(my_errno); info->rec_cache.seek_not_done=1; @@ -1600,7 +1692,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, } goto err; } - if (flag == 0) /* First block */ + if (block_of_record == 0) /* First block */ { if (block_info.rec_len > (uint) share->base.max_pack_length) goto panic; @@ -1642,11 +1734,17 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, { if (_ma_read_cache(&info->rec_cache,(byte*) to,filepos, block_info.data_len, - (!flag && skip_deleted_blocks) ? READING_NEXT :0)) + (!block_of_record && skip_deleted_blocks) ? + READING_NEXT : 0)) goto panic; } else { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < + block_info.filepos + block_info.data_len && + flush_io_cache(&info->rec_cache)) + goto err; /* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */ if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP))) { @@ -1656,7 +1754,11 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, } } } - if (flag++ == 0) + /* + Increment block-of-record counter. If it was the first block, + remember the position behind the block for the next call. + */ + if (block_of_record++ == 0) { info->nextpos=block_info.filepos+block_info.block_len; skip_deleted_blocks=0; @@ -1691,6 +1793,11 @@ uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) if (file >= 0) { + /* + We do not use my_pread() here because we want to have the file + pointer set to the end of the header after this function. + my_pread() may leave the file pointer untouched. + */ VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) != sizeof(info->header)) diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index d600fedb99b..57e540242b9 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -366,6 +366,11 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg pthread_mutex_unlock(&share->intern_lock); #endif break; + case HA_EXTRA_MARK_AS_LOG_TABLE: + pthread_mutex_lock(&share->intern_lock); + share->is_log_table= TRUE; + pthread_mutex_unlock(&share->intern_lock); + break; case HA_EXTRA_KEY_CACHE: case HA_EXTRA_NO_KEY_CACHE: default: diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c index e5ec97b090d..24713c1344f 100644 --- a/storage/maria/ma_ft_parser.c +++ b/storage/maria/ma_ft_parser.c @@ -283,7 +283,7 @@ static int maria_ft_add_word(MYSQL_FTPARSER_PARAM *param, static int maria_ft_parse_internal(MYSQL_FTPARSER_PARAM *param, - byte *doc, int doc_len) + char *doc, int doc_len) { byte *end=doc+doc_len; MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 38e71a44f8b..cf21ccb09e5 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -475,6 +475,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->data_file_type = DYNAMIC_RECORD; my_afree((gptr) disk_cache); _ma_setup_functions(share); + share->is_log_table= FALSE; #ifdef THREAD thr_lock_init(&share->lock); VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST)); diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index 5ae23c37261..795bfdb7fda 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -479,12 +479,6 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) if (!got_error) { maria_set_key_active(share->state.key_map, sinfo->key); - if (param->testflag & T_STATISTICS) - maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, - param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? - sinfo->notnull: NULL, - (ulonglong) info->state->records); - if (!sinfo->buffpek.elements) { @@ -497,6 +491,11 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) flush_maria_ft_buf(sinfo) || _ma_flush_pending_blocks(sinfo)) got_error=1; } + if (!got_error && param->testflag & T_STATISTICS) + maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sinfo->notnull: NULL, + (ulonglong) info->state->records); } my_free((gptr) sinfo->sort_keys,MYF(0)); my_free(_ma_get_rec_buff_ptr(info, sinfo->rec_buff), diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 248b17ce2c9..b916b211159 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -171,7 +171,18 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | HA_STATE_AKTIV | key_changed); - VOID(_ma_writeinfo(info,key_changed ? WRITEINFO_UPDATE_KEYFILE : 0)); + + /* + Every Maria function that updates Maria table must end with + call to _ma_writeinfo(). If operation (second param of + _ma_writeinfo()) is not 0 it sets share->changed to 1, that is + flags that data has changed. If operation is 0, this function + equals to no-op in this case. + + ma_update() must always pass !0 value as operation, since even if + there is no index change there could be data change. + */ + VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) { diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 24768b36c89..c04a4a51eca 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -158,6 +158,18 @@ int maria_write(MARIA_HA *info, byte *record) (*info->invalidator)(info->filename); info->invalidator=0; } + + /* + Update status of the table. We need to do so after each row write + for the log tables, as we want the new row to become visible to + other threads as soon as possible. We lock mutex here to follow + pthread memory visibility rules. + */ + pthread_mutex_lock(&share->intern_lock); + if (share->is_log_table) + _ma_update_status((void*) info); + pthread_mutex_unlock(&share->intern_lock); + allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(0); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index ecd93807a06..e0ba4bdb406 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -211,6 +211,9 @@ typedef struct st_maria_share uint blocksize; /* blocksize of keyfile */ myf write_flag; enum data_file_type data_file_type; + /* Below flag is needed to make log tables work with concurrent insert */ + my_bool is_log_table; + my_bool changed, /* If changed since lock */ global_changed, /* If changed since open */ not_flushed, temporary, delay_key_write, concurrent_insert; diff --git a/storage/maria/plug.in b/storage/maria/plug.in new file mode 100644 index 00000000000..de74293bd96 --- /dev/null +++ b/storage/maria/plug.in @@ -0,0 +1,7 @@ +MYSQL_STORAGE_ENGINE(maria, no, [Maria Storage Engine], + [Traditional transactional MySQL tables], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) +MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) +# Maria will probably go first into max builds, not all builds, +# so we don't declare it mandatory. + diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index e4766bfa05b..fb73635463d 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -99,7 +99,7 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_keycache.c mi_preload.c \ ft_parser.c ft_stopwords.c ft_static.c \ ft_update.c ft_boolean_search.c ft_nlq_search.c \ - sort.c ha_myisam.cc \ + sort.c ha_myisam.cc ft_myisam.c \ rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY? From 46b0384dc9dcacf567e094f590703102f27a9fcf Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 7 Sep 2006 18:40:10 +0200 Subject: [PATCH 28/95] a file to record known Maria bugs until we use a bugs db. BUGS_IN_MARIA.txt: a file to record our bugs until we use a bugs db --- BUGS_IN_MARIA.txt | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 BUGS_IN_MARIA.txt diff --git a/BUGS_IN_MARIA.txt b/BUGS_IN_MARIA.txt new file mode 100644 index 00000000000..e3a2894d399 --- /dev/null +++ b/BUGS_IN_MARIA.txt @@ -0,0 +1,49 @@ +This is our bugs database for now, until we switch to something +better. + +MARIABUG#1 +reported by Guilhem on 2006-09-07 +status: VERIFIED +========================================================== + +ma_test_all shows table corruption messages like this one: + +"error: key delete-link-chain corrupted" in table test2. + +For a simpler testcase extracted from ma_test_all, just run: + +rm -f test2.M*; ./ma_test2; ./maria_chk test2 + +While the similar MyISAM commands don't show any problem. + +You can make MyISAM show this same problem with just this patch: +===== myisam.h 1.80 vs edited ===== +*** /tmp/bk_myisam.h-1.80_O9fSEo 2006-09-07 18:30:28 +02:00 +--- edited/myisam.h 2006-09-07 18:25:54 +02:00 +*************** +*** 52,58 **** + #define MI_MAX_TEMP_LENGTH 2*1024L*1024L*1024L + + /* Possible values for myisam_block_size (must be power of 2) */ +! #define MI_KEY_BLOCK_LENGTH 1024 /* default key block length */ + #define MI_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ + #define MI_MAX_KEY_BLOCK_LENGTH 16384 + +--- 52,58 ---- + #define MI_MAX_TEMP_LENGTH 2*1024L*1024L*1024L + + /* Possible values for myisam_block_size (must be power of 2) */ +! #define MI_KEY_BLOCK_LENGTH 8192 /* default key block length */ + #define MI_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ + #define MI_MAX_KEY_BLOCK_LENGTH 16384 + +(Maria has 8192, and also 32768 for the max_key_block_length define). + +So it's not Maria-specific. + +I noticed that the corruption appears only for "recant>=64" (where +recant is a variable defined in mi_test2.c). + +For MyISAM we don't care as it's using 1024, but for Maria we have a +problem... +===================================================================== From e321f8eb29e8c3baf67cf0f745261dababf36877 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 8 Sep 2006 15:37:02 +0200 Subject: [PATCH 29/95] There is a Maria category now in the bugs db (but only devs can see this category), so no need for this file. BitKeeper/deleted/.del-BUGS_IN_MARIA.txt: Delete: BUGS_IN_MARIA.txt --- BUGS_IN_MARIA.txt | 49 ----------------------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 BUGS_IN_MARIA.txt diff --git a/BUGS_IN_MARIA.txt b/BUGS_IN_MARIA.txt deleted file mode 100644 index e3a2894d399..00000000000 --- a/BUGS_IN_MARIA.txt +++ /dev/null @@ -1,49 +0,0 @@ -This is our bugs database for now, until we switch to something -better. - -MARIABUG#1 -reported by Guilhem on 2006-09-07 -status: VERIFIED -========================================================== - -ma_test_all shows table corruption messages like this one: - -"error: key delete-link-chain corrupted" in table test2. - -For a simpler testcase extracted from ma_test_all, just run: - -rm -f test2.M*; ./ma_test2; ./maria_chk test2 - -While the similar MyISAM commands don't show any problem. - -You can make MyISAM show this same problem with just this patch: -===== myisam.h 1.80 vs edited ===== -*** /tmp/bk_myisam.h-1.80_O9fSEo 2006-09-07 18:30:28 +02:00 ---- edited/myisam.h 2006-09-07 18:25:54 +02:00 -*************** -*** 52,58 **** - #define MI_MAX_TEMP_LENGTH 2*1024L*1024L*1024L - - /* Possible values for myisam_block_size (must be power of 2) */ -! #define MI_KEY_BLOCK_LENGTH 1024 /* default key block length */ - #define MI_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ - #define MI_MAX_KEY_BLOCK_LENGTH 16384 - ---- 52,58 ---- - #define MI_MAX_TEMP_LENGTH 2*1024L*1024L*1024L - - /* Possible values for myisam_block_size (must be power of 2) */ -! #define MI_KEY_BLOCK_LENGTH 8192 /* default key block length */ - #define MI_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ - #define MI_MAX_KEY_BLOCK_LENGTH 16384 - -(Maria has 8192, and also 32768 for the max_key_block_length define). - -So it's not Maria-specific. - -I noticed that the corruption appears only for "recant>=64" (where -recant is a variable defined in mi_test2.c). - -For MyISAM we don't care as it's using 1024, but for Maria we have a -problem... -===================================================================== From 26fb36067a97b21e12fc2704fd08f9ddfd231ded Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 11 Sep 2006 16:12:31 +0200 Subject: [PATCH 30/95] WL#3234 Maria control file manager. Fitting ma_control_file_test into the mytap unittest framework: new directories: - unittest/storage/ for unit tests of any storage engine - unittest/storage/maria for ... Maria, containing ma_control_file-t. Later, older tests like ma_test*, ma_test_all (but which is Unix dependent in its current form) could move here too. The plugins macro enable building of unittest/storage/X for any enabled engine X which has such a directory. If Falcon wants to have unit tests there too, I may have to merge this patch into 5.x one day. config/ac-macros/plugins.m4: If a storage engine has a directory in unittest/storage, build this directory. configure.in: build storage engines' unit tests. storage/maria/Makefile.am: ma_control_file_test moves to unittest/storage/maria storage/maria/ma_control_file.c: more error codes when opening the control file fails. ma_control_file_end() may now return an error if my_close() failed. storage/maria/ma_control_file.h: more error codes when opening the control file fails. unittest/Makefile.am: adding unit tests for storage engines. Note that unit.pl simply recurses into "storage", so if a unit test for storage engine X has been built previously, and now you re-configure (without making clean) to disable this engine, then the unit test of X will not be rebuilt but will still be present in storage/X, so will be run. unittest/storage/maria/ma_control_file-t.c: Making the test fit the mytap framework (return all the way up the stack instead of assert(); use the mytap functions plan(), ok() etc). Adding test of file too short/long. unittest/storage/maria/Makefile.am: a_control_file-t is added to the Maria unit tests. Later, older tests (ma_test1 etc) could also move here. unittest/storage/Makefile.am: New BitKeeper file ``unittest/storage/Makefile.am'' --- config/ac-macros/plugins.m4 | 20 +- configure.in | 1 + storage/maria/Makefile.am | 10 +- storage/maria/ma_control_file.c | 79 ++-- storage/maria/ma_control_file.h | 15 +- storage/maria/ma_control_file_test.c | 312 -------------- unittest/Makefile.am | 4 +- unittest/storage/Makefile.am | 27 ++ unittest/storage/maria/Makefile.am | 29 ++ unittest/storage/maria/ma_control_file-t.c | 448 +++++++++++++++++++++ 10 files changed, 593 insertions(+), 352 deletions(-) delete mode 100644 storage/maria/ma_control_file_test.c create mode 100644 unittest/storage/Makefile.am create mode 100644 unittest/storage/maria/Makefile.am create mode 100644 unittest/storage/maria/ma_control_file-t.c diff --git a/config/ac-macros/plugins.m4 b/config/ac-macros/plugins.m4 index cfc5f8dbcbe..791405f8fbd 100644 --- a/config/ac-macros/plugins.m4 +++ b/config/ac-macros/plugins.m4 @@ -280,6 +280,7 @@ AC_DEFUN([MYSQL_CONFIGURE_PLUGINS],[ _MYSQL_EMIT_PLUGIN_ACTIONS(m4_bpatsubst(__mysql_plugin_list__, :, [,])) AC_SUBST([mysql_se_dirs]) AC_SUBST([mysql_pg_dirs]) + AC_SUBST([mysql_se_unittest_dirs]) ]) ]) ]) @@ -315,6 +316,7 @@ AC_DEFUN([__MYSQL_EMIT_CHECK_PLUGIN],[ ]) AC_MSG_CHECKING([whether to use ]$3) mysql_use_plugin_dir="" + mysql_use_plugin_unittest_dir="" m4_ifdef([$10],[ if test "X[$mysql_plugin_]$2" = Xyes -a \ "X[$with_plugin_]$2" != Xno -o \ @@ -407,10 +409,24 @@ dnl Although this is "pretty", it breaks libmysqld build m4_syscmd(test -f "$6/configure") ifelse(m4_sysval, 0, [AC_CONFIG_SUBDIRS($6)], - [AC_CONFIG_FILES($6/Makefile)] + [ + AC_CONFIG_FILES($6/Makefile) + m4_syscmd(test -d "unittest/$6") + ifelse(m4_sysval, 0, + [ + mysql_use_plugin_unittest_dir="$6" + AC_CONFIG_FILES(unittest/$6/Makefile) + ], []) + ] ) ifelse(m4_substr($6, 0, 8), [storage/], - [mysql_se_dirs="$mysql_se_dirs ]m4_substr($6, 8)", + [ + [mysql_se_name="]m4_substr($6, 8)" + mysql_se_dirs="$mysql_se_dirs $mysql_se_name" + if test -n "$mysql_use_plugin_unittest_dir" ; then + mysql_se_unittest_dirs="$mysql_se_unitest_dirs $mysql_se_name" + fi + ], m4_substr($6, 0, 7), [plugin/], [mysql_pg_dirs="$mysql_pg_dirs ]m4_substr($6, 7)", [AC_FATAL([don't know how to handle plugin dir ]$6)]) diff --git a/configure.in b/configure.in index f0cdcde1abb..61c2deb5b5e 100644 --- a/configure.in +++ b/configure.in @@ -2482,6 +2482,7 @@ AC_SUBST(MAKE_BINARY_DISTRIBUTION_OPTIONS) AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl unittest/Makefile unittest/mytap/Makefile unittest/mytap/t/Makefile dnl unittest/mysys/Makefile unittest/examples/Makefile dnl + unittest/storage/Makefile dnl strings/Makefile regex/Makefile storage/Makefile dnl man/Makefile BUILD/Makefile vio/Makefile dnl libmysql/Makefile client/Makefile dnl diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index f5678a55266..10204226742 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -47,7 +47,7 @@ maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test ma_control_file_test +noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h \ ma_control_file.h ha_maria.h @@ -89,12 +89,6 @@ ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -ma_control_file_test_DEPENDENCIES= $(LIBRARIES) -ma_control_file_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ - $(top_builddir)/storage/myisam/libmyisam.a \ - $(top_builddir)/mysys/libmysys.a \ - $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_rnext.c ma_rnext_same.c \ ma_search.c ma_page.c ma_key.c ma_locking.c \ @@ -113,7 +107,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ha_maria.cc \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ ma_sp_key.c ma_control_file.c -CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? maria_control +CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? SUFFIXES = .sh diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 5861246baf9..5fbb0a084df 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -41,7 +41,7 @@ uint32 last_logno; Control file is less then 512 bytes (a disk sector), to be as atomic as possible */ -static int control_file_fd; +static int control_file_fd= -1; static void lsn8store(char *buffer, const LSN *lsn) { @@ -87,15 +87,16 @@ static char simple_checksum(char *buffer, uint size) RETURN 0 - OK - 1 - Error + 1 - Error (in which case the file is left closed) */ -int ma_control_file_create_or_open() +CONTROL_FILE_ERROR ma_control_file_create_or_open() { char buffer[CONTROL_FILE_SIZE]; char name[FN_REFLEN]; MY_STAT stat_buff; my_bool create_file; int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR; + int error= CONTROL_FILE_UNKNOWN_ERROR; DBUG_ENTER("ma_control_file_create_or_open"); /* @@ -106,16 +107,19 @@ int ma_control_file_create_or_open() DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (4+4)); DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4); - /* name is concatenation of Maria's home dir and "control" */ - if (fn_format(name, "control", maria_data_root, "", MYF(MY_WME)) == NullS) - DBUG_RETURN(1); + if (control_file_fd >= 0) /* already open */ + DBUG_RETURN(0); + + if (fn_format(name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) == NullS) + DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); create_file= test(my_access(name,F_OK)); if (create_file) { if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0) - DBUG_RETURN(1); + DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); /* TODO: from "man fsync" on Linux: "fsync does not necessarily ensure that the entry in the directory @@ -127,10 +131,10 @@ int ma_control_file_create_or_open() To be safer we should make sure that there are no logs or data/index files around (indeed it could be that the control file alone was deleted or not restored, and we should not go on with life at this point). - + TODO: For now we trust (this is alpha version), but for beta if would be great to verify. - + We could have a tool which can rebuild the control file, by reading the directory of logs, finding the newest log, reading it to find last checkpoint... Slow but can save your db. @@ -138,7 +142,7 @@ int ma_control_file_create_or_open() LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; uint32 imposs_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; - + /* init the file with these "undefined" values */ DBUG_RETURN(ma_control_file_write_and_force(&imposs_lsn, imposs_logno, CONTROL_FILE_UPDATE_ALL)); @@ -147,12 +151,12 @@ int ma_control_file_create_or_open() /* Otherwise, file exists */ if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0) - DBUG_RETURN(1); - - if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) - DBUG_RETURN(1); + goto err; - if ((uint)stat_buff.st_size != CONTROL_FILE_SIZE) + if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) + goto err; + + if ((uint)stat_buff.st_size < CONTROL_FILE_SIZE) { /* Given that normally we write only a sector and it's atomic, the only @@ -165,31 +169,43 @@ int ma_control_file_create_or_open() disk/filesystem has a problem. So let's be rigid. */ - my_message(0, "wrong file size", MYF(0)); /* TODO: improve errors */ - my_error(HA_ERR_CRASHED, MYF(0), name); - DBUG_RETURN(1); + my_message(0, "too small file", MYF(0)); /* TODO: improve errors */ + error= CONTROL_FILE_TOO_SMALL; + goto err; + } + + if ((uint)stat_buff.st_size > CONTROL_FILE_SIZE) + { + my_message(0, "too big file", MYF(0)); /* TODO: improve errors */ + error= CONTROL_FILE_TOO_BIG; + goto err; } if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE, MYF(MY_FNABP | MY_WME))) - DBUG_RETURN(1); + goto err; if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE)) { my_message(0, "bad magic string", MYF(0)); - DBUG_RETURN(1); + error= CONTROL_FILE_BAD_MAGIC_STRING; + goto err; } if (simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET, CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) != buffer[CONTROL_FILE_CHECKSUM_OFFSET]) { my_message(0, "checksum mismatch", MYF(0)); - DBUG_RETURN(1); + error= CONTROL_FILE_BAD_CHECKSUM; + goto err; } last_checkpoint_lsn= lsn8korr(buffer + CONTROL_FILE_LSN_OFFSET); last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); DBUG_RETURN(0); +err: + ma_control_file_end(); + DBUG_RETURN(error); } @@ -227,6 +243,8 @@ int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE; DBUG_ENTER("ma_control_file_write_and_force"); + DBUG_ASSERT(control_file_fd >= 0); /* must be open */ + memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE); @@ -259,7 +277,7 @@ int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, 0, MYF(MY_FNABP | MY_WME)) || my_sync(control_file_fd, MYF(MY_WME))) DBUG_RETURN(1); - + /* TODO: you need some protection to be able to write last_* global vars */ if (update_checkpoint_lsn) last_checkpoint_lsn= *checkpoint_lsn; @@ -277,15 +295,26 @@ int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, ma_control_file_end() */ -void ma_control_file_end() +int ma_control_file_end() { + int close_error; DBUG_ENTER("ma_control_file_end"); - my_close(control_file_fd, MYF(MY_WME)); + + if (control_file_fd < 0) /* already closed */ + DBUG_RETURN(0); + + close_error= my_close(control_file_fd, MYF(MY_WME)); + /* + As my_close() frees structures even if close() fails, we do the same, + i.e. we mark the file as closed in all cases. + */ + control_file_fd= -1; /* As this module owns these variables, closing the module forbids access to them (just a safety): */ last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; - DBUG_VOID_RETURN; + + DBUG_RETURN(close_error); } diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 5f5581137b7..5ac6f158183 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -24,6 +24,7 @@ typedef struct st_lsn { #define maria_data_root "." #endif +#define CONTROL_FILE_BASE_NAME "maria_control" /* indicate absence of the log file number; first log is always number 1, 0 is impossible. @@ -55,7 +56,15 @@ extern uint32 last_logno; If present, read it to find out last checkpoint's LSN and last log. Called at engine's start. */ -int ma_control_file_create_or_open(); +typedef enum enum_control_file_error { + CONTROL_FILE_OK= 0, + CONTROL_FILE_TOO_SMALL, + CONTROL_FILE_TOO_BIG, + CONTROL_FILE_BAD_MAGIC_STRING, + CONTROL_FILE_BAD_CHECKSUM, + CONTROL_FILE_UNKNOWN_ERROR /* any other error */ +} CONTROL_FILE_ERROR; +CONTROL_FILE_ERROR ma_control_file_create_or_open(); /* Write information durably to the control file. @@ -66,10 +75,10 @@ int ma_control_file_create_or_open(); #define CONTROL_FILE_UPDATE_ONLY_LSN 1 #define CONTROL_FILE_UPDATE_ONLY_LOGNO 2 int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, - uint objs_to_write); + uint objs_to_write); /* Free resources taken by control file subsystem */ -void ma_control_file_end(); +int ma_control_file_end(); #endif diff --git a/storage/maria/ma_control_file_test.c b/storage/maria/ma_control_file_test.c deleted file mode 100644 index b99c61da4fb..00000000000 --- a/storage/maria/ma_control_file_test.c +++ /dev/null @@ -1,312 +0,0 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* Unit test of the control file module of the Maria engine */ - -/* TODO: make it fit the mytap framework */ - -/* - Note that it is not possible to test the durability of the write (can't - pull the plug programmatically :) -*/ - -#include "maria.h" -#include "ma_control_file.h" -#include - -char file_name[FN_REFLEN]; -int fd= -1; - -static void clean_files(); -static void run_test_normal(); -static void run_test_abnormal(); -static void usage(); -static void get_options(int argc, char *argv[]); - -int main(int argc,char *argv[]) -{ - MY_INIT(argv[0]); - - get_options(argc,argv); - - clean_files(); - run_test_normal(); - run_test_abnormal(); - - fprintf(stderr, "All tests succeeded\n"); - exit(0); /* all ok, if some test failed, we will have aborted */ -} - -/* - Abort unless given expression is non-zero. - - SYNOPSIS - DIE_UNLESS(expr) - - DESCRIPTION - We can't use any kind of system assert as we need to - preserve tested invariants in release builds as well. - - NOTE - This is infamous copy-paste from mysql_client_test.c; - we should instead put it in some include in one single place. -*/ - -#define DIE_UNLESS(expr) \ - ((void) ((expr) ? 0 : (die(__FILE__, __LINE__, #expr), 0))) -#define DIE_IF(expr) \ - ((void) (!(expr) ? 0 : (die(__FILE__, __LINE__, #expr), 0))) -#define DIE(expr) \ - die(__FILE__, __LINE__, #expr) - -void die(const char *file, int line, const char *expr) -{ - fprintf(stderr, "%s:%d: check failed: '%s'\n", file, line, expr); - abort(); -} - - -static void clean_files() -{ - DIE_IF(fn_format(file_name, "control", maria_data_root, "", MYF(MY_WME)) == - NullS); - my_delete(file_name, MYF(0)); /* maybe file does not exist, ignore error */ -} - - -static void run_test_normal() -{ - LSN checkpoint_lsn; - uint32 logno; - uint objs_to_write; - uint i; - char buffer[17]; - - /* TEST0: Instance starts from scratch (control file does not exist) */ - DIE_UNLESS(ma_control_file_create_or_open() == 0); - /* Check that the module reports no information */ - DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); - - /* TEST1: Simulate creation of one log */ - - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; - logno= 123; - DIE_UNLESS(ma_control_file_write_and_force(NULL, logno, - objs_to_write) == 0); - /* Check that last_logno was updated */ - DIE_UNLESS(last_logno == logno); - /* Simulate shutdown */ - ma_control_file_end(); - /* Verify amnesia */ - DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); - /* And restart */ - DIE_UNLESS(ma_control_file_create_or_open() == 0); - DIE_UNLESS(last_logno == logno); - - /* TEST2: Simulate creation of 5 logs */ - - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; - logno= 100; - for (i= 0; i<5; i++) - { - logno*= 3; - DIE_UNLESS(ma_control_file_write_and_force(NULL, logno, - objs_to_write) == 0); - } - ma_control_file_end(); - DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); - DIE_UNLESS(ma_control_file_create_or_open() == 0); - DIE_UNLESS(last_logno == logno); - - /* - TEST3: Simulate one checkpoint, one log creation, two checkpoints, one - log creation. - */ - - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; - checkpoint_lsn= (LSN){5, 10000}; - logno= 10; - DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, - objs_to_write) == 0); - /* check that last_logno was not updated */ - DIE_UNLESS(last_logno != logno); - /* Check that last_checkpoint_lsn was updated */ - DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset); - - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; - checkpoint_lsn= (LSN){5, 20000}; - logno= 17; - DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, - objs_to_write) == 0); - /* Check that checkpoint LSN was not updated */ - DIE_UNLESS(last_checkpoint_lsn.rec_offset != checkpoint_lsn.rec_offset); - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; - checkpoint_lsn= (LSN){17, 20000}; - DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, - objs_to_write) == 0); - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; - checkpoint_lsn= (LSN){17, 45000}; - DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, - objs_to_write) == 0); - objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; - logno= 19; - DIE_UNLESS(ma_control_file_write_and_force(&checkpoint_lsn, logno, - objs_to_write) == 0); - - ma_control_file_end(); - DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); - DIE_UNLESS(ma_control_file_create_or_open() == 0); - DIE_UNLESS(last_logno == logno); - DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset); - - /* - TEST4: actually check by ourselves the content of the file. - Note that constants (offsets) are hard-coded here, precisely to prevent - someone from changing them in the control file module and breaking - backward-compatibility. - TODO: when we reach the format-freeze state, we may even just do a - comparison with a raw binary string, to not depend on any uint4korr - future change/breakage. - */ - - DIE_IF((fd= my_open(file_name, - O_BINARY | O_RDWR, - MYF(MY_WME))) < 0); - DIE_IF(my_read(fd, buffer, 17, MYF(MY_FNABP | MY_WME)) != 0); - DIE_IF(my_close(fd, MYF(MY_WME)) != 0); - i= uint4korr(buffer+5); - DIE_UNLESS(i == last_checkpoint_lsn.file_no); - i= uint4korr(buffer+9); - DIE_UNLESS(i == last_checkpoint_lsn.rec_offset); - i= uint4korr(buffer+13); - DIE_UNLESS(i == last_logno); - - - /* TEST5: Simulate stop/start/nothing/stop/start */ - - ma_control_file_end(); - DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(ma_control_file_create_or_open() == 0); - ma_control_file_end(); - DIE_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - DIE_UNLESS(ma_control_file_create_or_open() == 0); - DIE_UNLESS(last_logno == logno); - DIE_UNLESS(last_checkpoint_lsn.file_no == checkpoint_lsn.file_no); - DIE_UNLESS(last_checkpoint_lsn.rec_offset == checkpoint_lsn.rec_offset); - -} - -static void run_test_abnormal() -{ - char buffer[4]; - /* Corrupt the control file */ - DIE_IF((fd= my_open(file_name, - O_BINARY | O_RDWR, - MYF(MY_WME))) < 0); - DIE_IF(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) != 0); - DIE_IF(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) != 0); - DIE_IF(my_close(fd, MYF(MY_WME)) != 0); - - /* Check that control file module sees the problem */ - DIE_IF(ma_control_file_create_or_open() == 0); - - /* Restore it and corrupt it differently */ - DIE_IF((fd= my_open(file_name, - O_BINARY | O_RDWR, - MYF(MY_WME))) < 0); - /* Restore magic string */ - DIE_IF(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) != 0); - DIE_IF(my_pread(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) != 0); - buffer[1]= buffer[0]+3; /* mangle checksum */ - DIE_IF(my_pwrite(fd, buffer+1, 1, 4, MYF(MY_FNABP | MY_WME)) != 0); - DIE_IF(my_close(fd, MYF(MY_WME)) != 0); - - /* Check that control file module sees the problem */ - DIE_IF(ma_control_file_create_or_open() == 0); - - /* Note that control file is left corrupted at this point */ -} - - -static struct my_option my_long_options[] = -{ -#ifndef DBUG_OFF - {"debug", '#', "Debug log.", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#endif - {"help", '?', "Display help and exit", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"version", 'V', "Print version number and exit", - 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} -}; - - -static void version() -{ - printf("ma_control_file_test: unit test for the control file " - "module of the Maria storage engine. Ver 1.0 \n"); -} - -static my_bool -get_one_option(int optid, const struct my_option *opt __attribute__((unused)), - char *argument) -{ - switch(optid) { - case 'V': - version(); - exit(0); - case '#': - DBUG_PUSH (argument); - break; - case '?': - version(); - usage(); - exit(0); - } - return 0; -} - - -/* Read options */ - -static void get_options(int argc, char *argv[]) -{ - int ho_error; - - if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) - exit(ho_error); - - return; -} /* get options */ - - -static void usage() -{ - printf("Usage: %s [options]\n\n", my_progname); - my_print_help(my_long_options); - my_print_variables(my_long_options); -} diff --git a/unittest/Makefile.am b/unittest/Makefile.am index ca3291efde0..176ac020bea 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -1,10 +1,10 @@ -SUBDIRS = mytap . mysys examples +SUBDIRS = mytap . mysys storage examples noinst_SCRIPTS = unit EXTRA_DIST = unit.pl CLEANFILES = unit -unittests = mytap mysys +unittests = mytap mysys storage test: unit ./unit run $(unittests) diff --git a/unittest/storage/Makefile.am b/unittest/storage/Makefile.am new file mode 100644 index 00000000000..21de2de75fc --- /dev/null +++ b/unittest/storage/Makefile.am @@ -0,0 +1,27 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +# Process this file with automake to create Makefile.in + +AUTOMAKE_OPTIONS = foreign + +# These are built from source in the Docs directory +EXTRA_DIST = +# Cannot use @mysql_se_dirs@ as not all engines have unit tests here +SUBDIRS = @mysql_se_unittest_dirs@ + +# Don't update the files from bitkeeper +%::SCCS/s.% diff --git a/unittest/storage/maria/Makefile.am b/unittest/storage/maria/Makefile.am new file mode 100644 index 00000000000..eae2990aea9 --- /dev/null +++ b/unittest/storage/maria/Makefile.am @@ -0,0 +1,29 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include +AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap + +# Only reason to link with libmyisam.a here is that it's where some fulltext +# pieces are (but soon we'll remove fulltext dependencies from Maria). +LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ + $(top_builddir)/storage/maria/libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +noinst_PROGRAMS = ma_control_file-t +CLEANFILES = maria_control diff --git a/unittest/storage/maria/ma_control_file-t.c b/unittest/storage/maria/ma_control_file-t.c new file mode 100644 index 00000000000..3ea6932c754 --- /dev/null +++ b/unittest/storage/maria/ma_control_file-t.c @@ -0,0 +1,448 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Unit test of the control file module of the Maria engine WL#3234 */ + +/* + Note that it is not possible to test the durability of the write (can't + pull the plug programmatically :) +*/ + +#include +#include +#include + +#ifndef WITH_MARIA_STORAGE_ENGINE +/* + If Maria is not compiled in, normally we don't come to building this test. +*/ +#error "Maria engine is not compiled in, test cannot be built" +#endif + +#include "maria.h" +#include "../../../storage/maria/ma_control_file.h" +#include + +char file_name[FN_REFLEN]; + +/* The values we'll set and expect the control file module to return */ +LSN expect_checkpoint_lsn; +uint32 expect_logno; + +static int delete_file(); +/* + Those are test-specific wrappers around the module's API functions: after + calling the module's API functions they perform checks on the result. +*/ +static int close_file(); /* wraps ma_control_file_end */ +static int create_or_open_file(); /* wraps ma_control_file_open_or_create */ +static int write_file(); /* wraps ma_control_file_write_and_force */ + +/* Tests */ +static int test_one_log(); +static int test_five_logs(); +static int test_3_checkpoints_and_2_logs(); +static int test_binary_content(); +static int test_start_stop(); +static int test_2_open_and_2_close(); +static int test_bad_magic_string(); +static int test_bad_checksum(); +static int test_bad_size(); + +/* Utility */ +static int verify_module_values_match_expected(); +static int verify_module_values_are_impossible(); +static void usage(); +static void get_options(int argc, char *argv[]); + +/* + If "expr" is FALSE, this macro will make the function print a diagnostic + message and immediately return 1. + This is inspired from assert() but does not crash the binary (sometimes we + may want to see how other tests go even if one fails). + RET_ERR means "return error". +*/ + +#define RET_ERR_UNLESS(expr) \ + {if (!(expr)) {diag("line %d: failure: '%s'", __LINE__, #expr); return 1;}} + + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + + plan(9); + + diag("Unit tests for control file"); + + get_options(argc,argv); + + diag("Deleting control file at startup, if there is an old one"); + RET_ERR_UNLESS(0 == delete_file()); /* if fails, can't continue */ + + diag("Tests of normal conditions"); + ok(0 == test_one_log(), "test of creating one log"); + ok(0 == test_five_logs(), "test of creating five logs"); + ok(0 == test_3_checkpoints_and_2_logs(), + "test of creating three checkpoints and two logs"); + ok(0 == test_binary_content(), "test of the binary content of the file"); + ok(0 == test_start_stop(), "test of multiple starts and stops"); + diag("Tests of abnormal conditions"); + ok(0 == test_2_open_and_2_close(), + "test of two open and two close (strange call sequence)"); + ok(0 == test_bad_magic_string(), "test of bad magic string"); + ok(0 == test_bad_checksum(), "test of bad checksum"); + ok(0 == test_bad_size(), "test of too small/big file"); + + return exit_status(); +} + + +static int delete_file() +{ + RET_ERR_UNLESS(fn_format(file_name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) != NullS); + /* + Maybe file does not exist, ignore error. + The error will however be printed on stderr. + */ + my_delete(file_name, MYF(MY_WME)); + expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + + return 0; +} + +/* + Verifies that global values last_checkpoint_lsn and last_logno (belonging + to the module) match what we expect. +*/ +static int verify_module_values_match_expected() +{ + RET_ERR_UNLESS(last_logno == expect_logno); + RET_ERR_UNLESS(last_checkpoint_lsn.file_no == + expect_checkpoint_lsn.file_no); + RET_ERR_UNLESS(last_checkpoint_lsn.rec_offset == + expect_checkpoint_lsn.rec_offset); + return 0; +} + + +/* + Verifies that global values last_checkpoint_lsn and last_logno (belonging + to the module) are impossible (this is used when the file has been closed). +*/ +static int verify_module_values_are_impossible() +{ + RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + RET_ERR_UNLESS(last_checkpoint_lsn.file_no == + CONTROL_FILE_IMPOSSIBLE_FILENO); + RET_ERR_UNLESS(last_checkpoint_lsn.rec_offset == + CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); + return 0; +} + + +static int close_file() +{ + /* Simulate shutdown */ + ma_control_file_end(); + /* Verify amnesia */ + RET_ERR_UNLESS(verify_module_values_are_impossible() == 0); + return 0; +} + +static int create_or_open_file() +{ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_OK); + /* Check that the module reports expected information */ + RET_ERR_UNLESS(verify_module_values_match_expected() == 0); + return 0; +} + +static int write_file(const LSN *checkpoint_lsn, + uint32 logno, + uint objs_to_write) +{ + RET_ERR_UNLESS(ma_control_file_write_and_force(checkpoint_lsn, logno, + objs_to_write) == 0); + /* Check that the module reports expected information */ + RET_ERR_UNLESS(verify_module_values_match_expected() == 0); + return 0; +} + +static int test_one_log() +{ + uint objs_to_write; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 123; + RET_ERR_UNLESS(write_file(NULL, expect_logno, + objs_to_write) == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_five_logs() +{ + uint objs_to_write; + uint i; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 100; + for (i= 0; i<5; i++) + { + expect_logno*= 3; + RET_ERR_UNLESS(write_file(NULL, expect_logno, + objs_to_write) == 0); + } + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_3_checkpoints_and_2_logs() +{ + uint objs_to_write; + /* + Simulate one checkpoint, one log creation, two checkpoints, one + log creation. + */ + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= (LSN){5, 10000}; + RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 17; + RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= (LSN){17, 20000}; + RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= (LSN){17, 45000}; + RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 19; + RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_binary_content() +{ + uint i; + int fd; + + /* + TEST4: actually check by ourselves the content of the file. + Note that constants (offsets) are hard-coded here, precisely to prevent + someone from changing them in the control file module and breaking + backward-compatibility. + TODO: when we reach the format-freeze state, we may even just do a + comparison with a raw binary string, to not depend on any uint4korr + future change/breakage. + */ + + char buffer[17]; + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_read(fd, buffer, 17, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + i= uint4korr(buffer+5); + RET_ERR_UNLESS(i == last_checkpoint_lsn.file_no); + i= uint4korr(buffer+9); + RET_ERR_UNLESS(i == last_checkpoint_lsn.rec_offset); + i= uint4korr(buffer+13); + RET_ERR_UNLESS(i == last_logno); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_start_stop() +{ + /* TEST5: Simulate start/nothing/stop/start/nothing/stop/start */ + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_2_open_and_2_close() +{ + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + + +static int test_bad_magic_string() +{ + char buffer[4]; + int fd; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + /* Corrupt magic string */ + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == + CONTROL_FILE_BAD_MAGIC_STRING); + /* Restore magic string */ + RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_bad_checksum() +{ + char buffer[4]; + int fd; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + /* Corrupt checksum */ + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + buffer[0]+= 3; /* mangle checksum */ + RET_ERR_UNLESS(my_pwrite(fd, buffer+1, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == + CONTROL_FILE_BAD_CHECKSUM); + /* Restore checksum */ + buffer[0]-= 3; + RET_ERR_UNLESS(my_pwrite(fd, buffer+1, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + + return 0; +} + + +static int test_bad_size() +{ + char buffer[]="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + int fd; + + /* A too short file */ + RET_ERR_UNLESS(delete_file() == 0); + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR | O_CREAT, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_SMALL); + RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_BIG); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + + /* Leave a correct control file */ + RET_ERR_UNLESS(delete_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + return 0; +} + + +static struct my_option my_long_options[] = +{ +#ifndef DBUG_OFF + {"debug", '#', "Debug log.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static void version() +{ + printf("ma_control_file_test: unit test for the control file " + "module of the Maria storage engine. Ver 1.0 \n"); +} + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'V': + version(); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + version(); + usage(); + exit(0); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, + get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} From 15b9ce2201453ce7ecc346b053910023e7d51b83 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 12 Sep 2006 15:52:01 +0200 Subject: [PATCH 31/95] WL#3234 Maria control file Fixes after Brian's and Serg's comments: storage engine's unit tests are now in storage//unittest instead of unittest/storage/ BitKeeper/deleted/.del-Makefile.am~78ea2e42d44d121f: Delete: unittest/storage/maria/Makefile.am BitKeeper/deleted/.del-Makefile.am~80497dbf1a80bdf3: Delete: unittest/storage/Makefile.am storage/maria/unittest/ma_control_file-t.c: Rename: unittest/storage/maria/ma_control_file-t.c -> storage/maria/unittest/ma_control_file-t.c config/ac-macros/plugins.m4: we change from unittest/storage// to storage//unittest: if the engine is enabled has such a directory, build this directory, and add it to the list of unit tests to run. configure.in: this dir does not exist anymore storage/maria/Makefile.am: to build Maria unittests, libmaria must be built. unittest/Makefile.am: unittest/storage is removed. target "unitests" is defined at "configure" time based on enabled engines. storage/maria/unittest/Makefile.am: simple Makefile.am to build ma_control_file-t --- config/ac-macros/plugins.m4 | 8 +++--- configure.in | 1 - storage/maria/Makefile.am | 3 +++ .../maria/unittest}/Makefile.am | 0 .../maria/unittest}/ma_control_file-t.c | 0 unittest/Makefile.am | 4 +-- unittest/storage/Makefile.am | 27 ------------------- 7 files changed, 9 insertions(+), 34 deletions(-) rename {unittest/storage/maria => storage/maria/unittest}/Makefile.am (100%) rename {unittest/storage/maria => storage/maria/unittest}/ma_control_file-t.c (100%) delete mode 100644 unittest/storage/Makefile.am diff --git a/config/ac-macros/plugins.m4 b/config/ac-macros/plugins.m4 index 791405f8fbd..ba7b3fcb087 100644 --- a/config/ac-macros/plugins.m4 +++ b/config/ac-macros/plugins.m4 @@ -411,11 +411,11 @@ dnl Although this is "pretty", it breaks libmysqld build [AC_CONFIG_SUBDIRS($6)], [ AC_CONFIG_FILES($6/Makefile) - m4_syscmd(test -d "unittest/$6") + m4_syscmd(test -d "$6/unittest") ifelse(m4_sysval, 0, [ - mysql_use_plugin_unittest_dir="$6" - AC_CONFIG_FILES(unittest/$6/Makefile) + mysql_use_plugin_unittest_dir="$6/unittest" + AC_CONFIG_FILES($6/unittest/Makefile) ], []) ] ) @@ -424,7 +424,7 @@ dnl Although this is "pretty", it breaks libmysqld build [mysql_se_name="]m4_substr($6, 8)" mysql_se_dirs="$mysql_se_dirs $mysql_se_name" if test -n "$mysql_use_plugin_unittest_dir" ; then - mysql_se_unittest_dirs="$mysql_se_unitest_dirs $mysql_se_name" + mysql_se_unittest_dirs="$mysql_se_unitest_dirs ../$mysql_use_plugin_unittest_dir" fi ], m4_substr($6, 0, 7), [plugin/], diff --git a/configure.in b/configure.in index 61c2deb5b5e..f0cdcde1abb 100644 --- a/configure.in +++ b/configure.in @@ -2482,7 +2482,6 @@ AC_SUBST(MAKE_BINARY_DISTRIBUTION_OPTIONS) AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl unittest/Makefile unittest/mytap/Makefile unittest/mytap/t/Makefile dnl unittest/mysys/Makefile unittest/examples/Makefile dnl - unittest/storage/Makefile dnl strings/Makefile regex/Makefile storage/Makefile dnl man/Makefile BUILD/Makefile vio/Makefile dnl libmysql/Makefile client/Makefile dnl diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 10204226742..100000fa6cd 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -28,6 +28,9 @@ LDADD = DEFS = @DEFS@ +# "." is needed first because tests in unittest need libmaria +SUBDIRS = . unittest + EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt pkgdata_DATA = ma_test_all ma_test_all.res pkglib_LIBRARIES = libmaria.a diff --git a/unittest/storage/maria/Makefile.am b/storage/maria/unittest/Makefile.am similarity index 100% rename from unittest/storage/maria/Makefile.am rename to storage/maria/unittest/Makefile.am diff --git a/unittest/storage/maria/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c similarity index 100% rename from unittest/storage/maria/ma_control_file-t.c rename to storage/maria/unittest/ma_control_file-t.c diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 176ac020bea..76004e67fae 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -1,10 +1,10 @@ -SUBDIRS = mytap . mysys storage examples +SUBDIRS = mytap . mysys examples noinst_SCRIPTS = unit EXTRA_DIST = unit.pl CLEANFILES = unit -unittests = mytap mysys storage +unittests = mytap mysys @mysql_se_unittest_dirs@ test: unit ./unit run $(unittests) diff --git a/unittest/storage/Makefile.am b/unittest/storage/Makefile.am deleted file mode 100644 index 21de2de75fc..00000000000 --- a/unittest/storage/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -# Process this file with automake to create Makefile.in - -AUTOMAKE_OPTIONS = foreign - -# These are built from source in the Docs directory -EXTRA_DIST = -# Cannot use @mysql_se_dirs@ as not all engines have unit tests here -SUBDIRS = @mysql_se_unittest_dirs@ - -# Don't update the files from bitkeeper -%::SCCS/s.% From cdf831cf94fe9aabde6ffb5b19557893416061d6 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 14 Sep 2006 19:06:51 +0200 Subject: [PATCH 32/95] WL#3071 Maria checkpoint: changing pseudocode to use the structures of the Maria pagecache ("pagecache->changed_blocks" etc) and other Maria structures inherited from MyISAM (THR_LOCK_maria etc). mysys/mf_pagecache.c: comment storage/maria/ma_checkpoint.c: changing pseudocode to use the structures of the Maria pagecache ("pagecache->changed_blocks" etc) and other Maria structures inherited from MyISAM (THR_LOCK_maria etc). storage/maria/ma_checkpoint.h: copyright storage/maria/ma_control_file.c: copyright storage/maria/ma_control_file.h: copyright storage/maria/ma_least_recently_dirtied.c: copyright storage/maria/ma_least_recently_dirtied.h: copyright storage/maria/ma_recovery.c: copyright storage/maria/ma_recovery.h: copyright storage/maria/unittest/Makefile.am: copyright --- mysys/mf_pagecache.c | 4 + storage/maria/ma_checkpoint.c | 124 ++++++++++++++++++---- storage/maria/ma_checkpoint.h | 16 +++ storage/maria/ma_control_file.c | 21 +++- storage/maria/ma_control_file.h | 16 +++ storage/maria/ma_least_recently_dirtied.c | 16 +++ storage/maria/ma_least_recently_dirtied.h | 16 +++ storage/maria/ma_recovery.c | 16 +++ storage/maria/ma_recovery.h | 16 +++ storage/maria/unittest/Makefile.am | 2 +- 10 files changed, 225 insertions(+), 22 deletions(-) diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4693995f922..3a054077809 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -2937,6 +2937,10 @@ restart: } pagecache->blocks_changed--; pagecache->global_blocks_changed--; + /* + free_block() will change the status of the block so no need to change + it here. + */ } /* Cache is locked, so we can relese page before freeing it */ diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 22e7b93d2f4..83312ce37b8 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3071 Maria checkpoint First version written by Guilhem Bichot on 2006-04-27. @@ -110,7 +126,7 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) LSN candidate_max_rec_lsn_at_last_checkpoint; /* to avoid { lock + no-op + unlock } in the common (==indirect) case */ my_bool need_log_mutex; - + DBUG_ENTER("execute_checkpoint"); safemutex_assert_owner(log_mutex); @@ -120,7 +136,7 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) { /* much I/O work to do, release log mutex */ unlock(log_mutex); - + switch (level) { case FULL: @@ -167,6 +183,13 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) } +/* + Does an indirect checpoint (collects data from data structures, writes into + a checkpoint log record). + Returns the largest LSN of the LRD when the checkpoint happened (this is a + fuzzy definition), or LSN_IMPOSSIBLE on error. That LSN is used for the + "two-checkpoint rule" (MEDIUM checkpoints). +*/ LSN checkpoint_indirect(my_bool need_log_mutex) { DBUG_ENTER("checkpoint_indirect"); @@ -180,6 +203,7 @@ LSN checkpoint_indirect(my_bool need_log_mutex) LSN checkpoint_lsn; LSN candidate_max_rec_lsn_at_last_checkpoint= 0; list_element *el; /* to scan lists */ + ulong stored_LRD_size= 0; DBUG_ASSERT(sizeof(byte *) <= 8); @@ -192,27 +216,70 @@ LSN checkpoint_indirect(my_bool need_log_mutex) DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); - lock(global_LRD_mutex); - string1.length= 8+8+(8+8)*LRD->count; + /* STEP 1: fetch information about dirty pages */ + + /* + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* + This is an over-estimation, as in theory blocks_changed may contain + non-PAGECACHE_LSN_PAGE pages, which we don't want to store into the + checkpoint record; the true number of page-LRD-info we'll store into the + record is stored_LRD_size. + */ + string1.length= 8+8+(8+8)*pagecache->blocks_changed; if (NULL == (string1.str= my_malloc(string1.length))) goto err; ptr= string1.str; int8store(ptr, checkpoint_start_lsn); - ptr+= 8; - int8store(ptr, LRD->count); - ptr+= 8; - if (LRD->count) + ptr+= 8+8; /* don't store stored_LRD_size now, wait */ + if (pagecache->blocks_changed > 0) { - candidate_max_rec_lsn_at_last_checkpoint= LRD->last->rec_lsn; - for (el= LRD->first; el; el= el->next) + /* + There are different ways to scan the dirty blocks; + flush_all_key_blocks() uses a loop over pagecache->used_last->next_used, + and for each element of the loop, loops into + pagecache->changed_blocks[FILE_HASH(file of the element)]. + This has the drawback that used_last includes non-dirty blocks, and it's + two loops over many elements. Here we try something simpler. + If there are no blocks in changed_blocks[file_hash], we should hit + zeroes and skip them. + */ + uint file_hash; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) { - int8store(ptr, el->page_id); - ptr+= 8; - int8store(ptr, el->rec_lsn); - ptr+= 8; + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & BLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + { + /* no need to store it in the checkpoint record */ + continue; + } + /* Q: two "block"s cannot have the same "hash_link", right? */ + int8store(ptr, block->hash_link->pageno); + ptr+= 8; + /* I assume rec_lsn will be member of "block", not of "hash_link" */ + int8store(ptr, block->rec_lsn); + ptr+= 8; + stored_LRD_size++; + set_if_bigger(candidate_max_rec_lsn_at_last_checkpoint, + block->rec_lsn); + } } - } - unlock(global_LRD_mutex); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_ASSERT(stored_LRD_size <= pagecache->blocks_changed); + int8store(string1.str+8, stored_LRD_size); + string1.length= 8+8+(8+8)*stored_LRD_size; + + /* STEP 2: fetch information about transactions */ /* If trx are in more than one list (e.g. three: @@ -253,19 +320,28 @@ LSN checkpoint_indirect(my_bool need_log_mutex) } unlock(global_transactions_list_mutex); + /* STEP 3: fetch information about table files */ + + /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ lock(global_share_list_mutex); string3.length= 8+(8+8)*share_list->count; if (NULL == (string3.str= my_malloc(string3.length))) goto err; ptr= string3.str; - /* possibly latch each MARIA_SHARE */ + /* possibly latch each MARIA_SHARE, one by one, like this: */ + pthread_mutex_lock(&share->intern_lock); + /* + We'll copy the file id (a bit like share->kfile), the file name + (like share->unique_file_name[_length]). + */ make_copy_of_global_share_list_to_array; + pthread_mutex_unlock(&share->intern_lock); unlock(global_share_list_mutex); /* work on copy */ int8store(ptr, elements_in_array); ptr+= 8; - for (scan_array) + for (el in array) { int8store(ptr, array[...].file_id); ptr+= 8; @@ -273,9 +349,11 @@ LSN checkpoint_indirect(my_bool need_log_mutex) ptr+= ...; /* these two are long ops (involving disk I/O) that's why we copied the - list: + list, to not keep the list locked for long: */ flush_bitmap_pages(el); + /* TODO: and also autoinc counter, logical file end, free page list */ + /* fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per second, so if you have touched 1000 files it's 7 seconds). @@ -283,7 +361,8 @@ LSN checkpoint_indirect(my_bool need_log_mutex) force_file(el); } - /* now write the record */ + /* LAST STEP: now write the checkpoint log record */ + string_array[0]= string1; string_array[1]= string2; string_array[2]= string3; @@ -292,6 +371,11 @@ LSN checkpoint_indirect(my_bool need_log_mutex) checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, &system_trans, string_array); + /* + Do nothing between the log write and the control file write, for the + "repair control file" tool to be possible one day. + */ + if (LSN_IMPOSSIBLE == checkpoint_lsn) goto err; diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h index a9de18c695f..1b8064fa755 100644 --- a/storage/maria/ma_checkpoint.h +++ b/storage/maria/ma_checkpoint.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3071 Maria checkpoint First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 5fbb0a084df..5b66577938f 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3234 Maria control file First version written by Guilhem Bichot on 2006-04-27. @@ -137,7 +153,10 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() We could have a tool which can rebuild the control file, by reading the directory of logs, finding the newest log, reading it to find last - checkpoint... Slow but can save your db. + checkpoint... Slow but can save your db. For this to be possible, we + must always write to the control file right after writing the checkpoint + log record, and do nothing in between (i.e. the checkpoint must be + usable as soon as it has been written to the log). */ LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 5ac6f158183..9a99a721469 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3234 Maria control file First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c index c6285fe47cd..b0b7fb1ef10 100644 --- a/storage/maria/ma_least_recently_dirtied.c +++ b/storage/maria/ma_least_recently_dirtied.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3261 Maria - background flushing of the least-recently-dirtied pages First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h index 6a30db4b5f0..f6d7420febc 100644 --- a/storage/maria/ma_least_recently_dirtied.h +++ b/storage/maria/ma_least_recently_dirtied.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3261 Maria - background flushing of the least-recently-dirtied pages First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index babf7507ef1..b6739b86874 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3072 Maria recovery First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index b85ffdeef59..05026f4b52a 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3072 Maria recovery First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index eae2990aea9..8a5ca3d669f 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by From 09a7f30973304620e00b934597b72f7dd7eeb966 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 15 Sep 2006 11:05:35 +0200 Subject: [PATCH 33/95] WL#3234 Maria Control file manager last round of fixes to the storage engines' and plugins' unit tests structure. Will extract a total patch and push it in 5.1 as has been approved. Makefile.am: unittest must be before storage and plugin, because engine and plugin may have unit tests which link with libtap which is found in unitttest. config/ac-macros/plugins.m4: When enabling an engine/plugin, add its directory to the list of directories where unit tests should be searched. That is, its directory will be recursively searched by our unit test framework which will execute any executable *-t file. storage/maria/ma_control_file.c: those my_message pollute the output of unit tests. storage/maria/plug.in: When Maria is enabled, add its unittest Makefile. unittest/Makefile.am: plugins too --- Makefile.am | 4 ++-- config/ac-macros/plugins.m4 | 24 ++++++++---------------- storage/maria/ma_control_file.c | 11 +++++++---- storage/maria/plug.in | 2 +- unittest/Makefile.am | 2 +- 5 files changed, 19 insertions(+), 24 deletions(-) diff --git a/Makefile.am b/Makefile.am index 4adc06acad7..4f0faa59845 100644 --- a/Makefile.am +++ b/Makefile.am @@ -24,11 +24,11 @@ EXTRA_DIST = INSTALL-SOURCE INSTALL-WIN-SOURCE \ SUBDIRS = . include @docs_dirs@ @zlib_dir@ \ @readline_topdir@ sql-common \ @thread_dirs@ pstack \ - @sql_union_dirs@ storage plugin \ + @sql_union_dirs@ unittest storage plugin \ @sql_server@ scripts @man_dirs@ tests \ netware @libmysqld_dirs@ \ mysql-test support-files @tools_dirs@ \ - unittest win + win DIST_SUBDIRS = $(SUBDIRS) BUILD diff --git a/config/ac-macros/plugins.m4 b/config/ac-macros/plugins.m4 index ba7b3fcb087..87f057e696a 100644 --- a/config/ac-macros/plugins.m4 +++ b/config/ac-macros/plugins.m4 @@ -281,6 +281,7 @@ AC_DEFUN([MYSQL_CONFIGURE_PLUGINS],[ AC_SUBST([mysql_se_dirs]) AC_SUBST([mysql_pg_dirs]) AC_SUBST([mysql_se_unittest_dirs]) + AC_SUBST([mysql_pg_unittest_dirs]) ]) ]) ]) @@ -316,7 +317,6 @@ AC_DEFUN([__MYSQL_EMIT_CHECK_PLUGIN],[ ]) AC_MSG_CHECKING([whether to use ]$3) mysql_use_plugin_dir="" - mysql_use_plugin_unittest_dir="" m4_ifdef([$10],[ if test "X[$mysql_plugin_]$2" = Xyes -a \ "X[$with_plugin_]$2" != Xno -o \ @@ -409,26 +409,18 @@ dnl Although this is "pretty", it breaks libmysqld build m4_syscmd(test -f "$6/configure") ifelse(m4_sysval, 0, [AC_CONFIG_SUBDIRS($6)], - [ - AC_CONFIG_FILES($6/Makefile) - m4_syscmd(test -d "$6/unittest") - ifelse(m4_sysval, 0, - [ - mysql_use_plugin_unittest_dir="$6/unittest" - AC_CONFIG_FILES($6/unittest/Makefile) - ], []) - ] + [AC_CONFIG_FILES($6/Makefile)] ) ifelse(m4_substr($6, 0, 8), [storage/], [ - [mysql_se_name="]m4_substr($6, 8)" - mysql_se_dirs="$mysql_se_dirs $mysql_se_name" - if test -n "$mysql_use_plugin_unittest_dir" ; then - mysql_se_unittest_dirs="$mysql_se_unitest_dirs ../$mysql_use_plugin_unittest_dir" - fi + [mysql_se_dirs="$mysql_se_dirs ]m4_substr($6, 8)" + mysql_se_unittest_dirs="$mysql_se_unittest_dirs ../$6" ], m4_substr($6, 0, 7), [plugin/], - [mysql_pg_dirs="$mysql_pg_dirs ]m4_substr($6, 7)", + [ + [mysql_pg_dirs="$mysql_pg_dirs ]m4_substr($6, 7)" + mysql_pg_unittest_dirs="$mysql_pg_unittest_dirs ../$6" + ], [AC_FATAL([don't know how to handle plugin dir ]$6)]) fi ]) diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 5b66577938f..5090fac4182 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -188,14 +188,17 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() disk/filesystem has a problem. So let's be rigid. */ - my_message(0, "too small file", MYF(0)); /* TODO: improve errors */ + /* + TODO: store a message "too small file" somewhere, so that it goes to + MySQL's error log at startup. + */ error= CONTROL_FILE_TOO_SMALL; goto err; } if ((uint)stat_buff.st_size > CONTROL_FILE_SIZE) { - my_message(0, "too big file", MYF(0)); /* TODO: improve errors */ + /* TODO: store "too big file" message */ error= CONTROL_FILE_TOO_BIG; goto err; } @@ -206,7 +209,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE)) { - my_message(0, "bad magic string", MYF(0)); + /* TODO: store message "bad magic string" somewhere */ error= CONTROL_FILE_BAD_MAGIC_STRING; goto err; } @@ -214,7 +217,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) != buffer[CONTROL_FILE_CHECKSUM_OFFSET]) { - my_message(0, "checksum mismatch", MYF(0)); + /* TODO: store message "checksum mismatch" somewhere */ error= CONTROL_FILE_BAD_CHECKSUM; goto err; } diff --git a/storage/maria/plug.in b/storage/maria/plug.in index de74293bd96..a9b35aefbfb 100644 --- a/storage/maria/plug.in +++ b/storage/maria/plug.in @@ -1,7 +1,7 @@ MYSQL_STORAGE_ENGINE(maria, no, [Maria Storage Engine], [Traditional transactional MySQL tables], [max,max-no-ndb]) MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) +MYSQL_PLUGIN_ACTIONS(maria, [AC_CONFIG_FILES(storage/maria/unittest/Makefile)]) MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) # Maria will probably go first into max builds, not all builds, # so we don't declare it mandatory. - diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 76004e67fae..7323ef5fb77 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -4,7 +4,7 @@ noinst_SCRIPTS = unit EXTRA_DIST = unit.pl CLEANFILES = unit -unittests = mytap mysys @mysql_se_unittest_dirs@ +unittests = mytap mysys @mysql_se_unittest_dirs@ @mysql_pg_unittest_dirs@ test: unit ./unit run $(unittests) From b860539e37bb585794e144d5785906af13e0e1b9 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Sep 2006 10:42:15 +0200 Subject: [PATCH 34/95] Maria, very minor changes. mysys/Makefile.am: test_pagecache* programs need to link with dbug (I used BUILD/compile-pentium64-valgrind-max) mysys/mf_pagecache.c: unused constant --- mysys/Makefile.am | 3 ++- mysys/mf_pagecache.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 6317d6c9285..edafd128eb0 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -20,7 +20,8 @@ MYSQLBASEdir= $(prefix) INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ -I$(top_srcdir)/include -I$(srcdir) pkglib_LIBRARIES = libmysys.a -LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a +LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a \ + $(top_builddir)/dbug/libdbug.a noinst_HEADERS = mysys_priv.h my_static.h libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 3a054077809..9c6f586be8a 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -154,7 +154,6 @@ struct st_pagecache_hash_link #define BLOCK_IN_FLUSH 16 /* block is in flush operation */ #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ #define BLOCK_WRLOCK 64 /* write locked block */ -#define BLOCK_CPYWRT 128 /* block buffer is in copy&write (see also cpyrd) */ /* page status, returned by find_key_block */ #define PAGE_READ 0 From 8e04cdb2dd1b5102e578c9c98b220a07857bfcbb Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 21 Sep 2006 23:12:56 +0200 Subject: [PATCH 35/95] Maria: fixes for build failures in pushbuild. Comments, fixing of function names. mysys/mf_pagecache.c: comments fixing. More comments. pagecache_ulock_block->page_unlock_block sql/mysqld.cc: MyISAM is always enabled so Maria needs have_maria which MyISAM does not need. This should fix a link failure in pushbuild storage/Makefile.am: force myisam to be built before maria (will not be needed when Maria does not depend on MyISAM anymore) --- mysys/mf_pagecache.c | 25 +++++++++++++++---------- sql/mysqld.cc | 2 ++ storage/Makefile.am | 7 ++++++- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 9c6f586be8a..4fa814d8188 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -523,13 +523,13 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); /* - Read page from the disk + Write page to the disk SYNOPSIS pagecache_fwrite() pagecache - page cache pointer filedesc - pagecache file descriptor structure - buffer - buffer in which we will read + buffer - buffer which we will write type - page type (plain or with LSN) flags - MYF() flags @@ -571,6 +571,10 @@ uint pagecache_fwrite(PAGECACHE *pagecache, (pageno)<<(pagecache->shift), flags) +/* + next_power(value) is 2 at the power of (1+floor(log2(value))); + e.g. next_power(2)=4, next_power(3)=4. +*/ static uint next_power(uint value) { uint old_value= 1; @@ -2167,9 +2171,9 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, DBUG_RETURN(0); } -void pagecache_ulock_block(PAGECACHE_BLOCK_LINK *block) +void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_ulock_block"); + DBUG_ENTER("pagecache_unlock_block"); BLOCK_INFO(block); DBUG_ASSERT(block->status & BLOCK_WRLOCK); block->status&= ~BLOCK_WRLOCK; @@ -2234,7 +2238,7 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, Removes writelog and puts read lock (which is nothing in our implementation) */ - pagecache_ulock_block(block); + pagecache_unlock_block(block); case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ #ifndef DBUG_OFF @@ -2247,8 +2251,9 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, if (lock == PAGECACHE_LOCK_WRITE_TO_READ) { pagecache_change_lock(block, 0); - } else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || - lock == PAGECACHE_LOCK_READ_UNLOCK) + } + else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK) { pagecache_remove_lock(block); } @@ -2422,8 +2427,8 @@ void pagecache_unlock_page(PAGECACHE *pagecache, pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* - As soon as we keep lock cache can be used, and we have lock bacause want - aunlock. + As soon as we keep lock cache can be used, and we have lock because want + to unlock. */ DBUG_ASSERT(pagecache->can_be_used); @@ -2891,7 +2896,7 @@ restart: link= get_present_hash_link(pagecache, file, pageno, &unused_start); if (!link) { - DBUG_PRINT("info", ("There is no fuch page in the cache")); + DBUG_PRINT("info", ("There is no such page in the cache")); DBUG_RETURN(0); } block= link->block; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index f1d550d0ff1..684b327df6d 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -8108,6 +8108,7 @@ void refresh_status(THD *thd) #undef have_federated_db #undef have_partition_db #undef have_blackhole_db +#undef have_maria SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO; SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO; @@ -8117,6 +8118,7 @@ SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_maria= SHOW_OPTION_NO; #ifndef WITH_INNOBASE_STORAGE_ENGINE uint innobase_flush_log_at_trx_commit; diff --git a/storage/Makefile.am b/storage/Makefile.am index 900e486c6ac..e5d7ac2778f 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -20,7 +20,12 @@ AUTOMAKE_OPTIONS = foreign # These are built from source in the Docs directory EXTRA_DIST = -SUBDIRS = @mysql_se_dirs@ +# Until we remove fulltext-related references from Maria to MyISAM +# MyISAM must be built before Maria, which is not the case by default +# because of alphabetical order +# So we put myisam first; this is very ugly regarding plugins' logic +# but it works, and we'll remove it soon. +SUBDIRS = myisam @mysql_se_dirs@ # Don't update the files from bitkeeper %::SCCS/s.% From 0a1dc8af5b665647a08c51f75b5d5c2e1a112d99 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 11 Oct 2006 19:30:16 +0300 Subject: [PATCH 36/95] Merge of recent MyISAM changes into Maria. Testsuite passes as much as in the main 5.1 (partition and ndb_alter_table fail). mysql-test/r/maria.result: merge from MyISAM mysql-test/r/ps_maria.result: merge from MyISAM mysql-test/t/maria.test: merge from MyISAM sql/mysql_priv.h: fix after wrong merge sql/mysqld.cc: fix after wrong merge sql/set_var.cc: adding _db like other engines have storage/maria/Makefile.am: merge from MyISAM storage/maria/ha_maria.cc: merge from MyISAM storage/maria/ha_maria.h: merge from MyISAM storage/maria/ma_check.c: merge from MyISAM storage/maria/ma_delete.c: merge from MyISAM storage/maria/ma_init.c: maria_inited should rather be my_bool storage/maria/ma_locking.c: merge from MyISAM storage/maria/ma_packrec.c: merge from MyISAM storage/maria/ma_panic.c: maria_panic() should not take mutex if engine has not been inited. storage/maria/ma_rkey.c: merge from MyISAM storage/maria/ma_write.c: merge from MyISAM storage/maria/maria_def.h: merge from MyISAM. maria_inited is needed for maria_panic(). storage/maria/maria_ftdump.c: merge from MyISAM --- mysql-test/r/maria.result | 28 +++ mysql-test/r/ps_maria.result | 320 +++++++++++++++++------------------ mysql-test/t/maria.test | 36 ++++ sql/mysql_priv.h | 3 + sql/mysqld.cc | 5 + sql/set_var.cc | 4 +- storage/maria/Makefile.am | 2 +- storage/maria/ha_maria.cc | 56 ++++-- storage/maria/ha_maria.h | 6 +- storage/maria/ma_check.c | 15 +- storage/maria/ma_delete.c | 2 +- storage/maria/ma_init.c | 6 +- storage/maria/ma_locking.c | 15 ++ storage/maria/ma_packrec.c | 18 +- storage/maria/ma_panic.c | 2 + storage/maria/ma_rkey.c | 55 +++--- storage/maria/ma_write.c | 8 +- storage/maria/maria_def.h | 8 + storage/maria/maria_ftdump.c | 3 +- 19 files changed, 369 insertions(+), 223 deletions(-) diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index b6924e4395b..396b5da4bc1 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -517,6 +517,34 @@ select c1 from t1 order by c1 limit 1; c1 a drop table t1; +create table t1 (a int not null, primary key(a)); +create table t2 (a int not null, b int not null, primary key(a,b)); +insert into t1 values (1),(2),(3),(4),(5),(6); +insert into t2 values (1,1),(2,1); +lock tables t1 read local, t2 read local; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +a a b +1 1 1 +2 2 1 +insert into t2 values(2,0); +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +a a b +1 1 1 +2 2 1 +drop table t1,t2; +CREATE TABLE t1 (c1 varchar(250) NOT NULL); +CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)); +INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003'); +INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004'); +LOCK TABLES t1 READ LOCAL, t2 READ LOCAL; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 +WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +t1c1 t2c1 +INSERT INTO t2 VALUES ('test000001'), ('test000005'); +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 +WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +t1c1 t2c1 +DROP TABLE t1,t2; CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); Got one of the listed errors create table t1 (a int, b varchar(200), c text not null) checksum=1; diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result index f47c72187e2..9268c44eecd 100644 --- a/mysql-test/r/ps_maria.result +++ b/mysql-test/r/ps_maria.result @@ -1929,26 +1929,26 @@ def @arg09 253 23 1 Y 128 31 63 def @arg10 253 23 1 Y 128 31 63 def @arg11 253 67 6 Y 128 30 63 def @arg12 253 67 6 Y 128 30 63 -def @arg13 253 8192 10 Y 128 31 63 -def @arg14 253 8192 19 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 8 Y 128 31 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 def @arg17 253 20 4 Y 128 0 63 def @arg18 253 20 1 Y 128 0 63 def @arg19 253 20 1 Y 128 0 63 -def @arg20 253 8192 1 Y 0 31 8 -def @arg21 253 8192 10 Y 0 31 8 -def @arg22 253 8192 30 Y 0 31 8 -def @arg23 253 8192 8 Y 128 31 63 -def @arg24 253 8192 8 Y 0 31 8 -def @arg25 253 8192 4 Y 128 31 63 -def @arg26 253 8192 4 Y 0 31 8 -def @arg27 253 8192 10 Y 128 31 63 -def @arg28 253 8192 10 Y 0 31 8 -def @arg29 253 8192 8 Y 128 31 63 -def @arg30 253 8192 8 Y 0 31 8 -def @arg31 253 8192 3 Y 0 31 8 -def @arg32 253 8192 6 Y 0 31 8 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, @@ -1976,26 +1976,26 @@ def @arg09 253 23 0 Y 128 31 63 def @arg10 253 23 0 Y 128 31 63 def @arg11 253 67 0 Y 128 30 63 def @arg12 253 67 0 Y 128 30 63 -def @arg13 253 8192 0 Y 128 31 63 -def @arg14 253 8192 0 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 0 Y 128 31 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 def @arg17 253 20 0 Y 128 0 63 def @arg18 253 20 0 Y 128 0 63 def @arg19 253 20 0 Y 128 0 63 -def @arg20 253 8192 0 Y 0 31 8 -def @arg21 253 8192 0 Y 0 31 8 -def @arg22 253 8192 0 Y 0 31 8 -def @arg23 253 8192 0 Y 128 31 63 -def @arg24 253 8192 0 Y 0 31 8 -def @arg25 253 8192 0 Y 128 31 63 -def @arg26 253 8192 0 Y 0 31 8 -def @arg27 253 8192 0 Y 128 31 63 -def @arg28 253 8192 0 Y 0 31 8 -def @arg29 253 8192 0 Y 128 31 63 -def @arg30 253 8192 0 Y 0 31 8 -def @arg31 253 8192 0 Y 0 31 8 -def @arg32 253 8192 0 Y 0 31 8 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL prepare stmt1 from "select @@ -2026,26 +2026,26 @@ def @arg09 253 23 1 Y 128 31 63 def @arg10 253 23 1 Y 128 31 63 def @arg11 253 67 6 Y 128 30 63 def @arg12 253 67 6 Y 128 30 63 -def @arg13 253 8192 10 Y 128 31 63 -def @arg14 253 8192 19 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 8 Y 128 31 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 def @arg17 253 20 4 Y 128 0 63 def @arg18 253 20 1 Y 128 0 63 def @arg19 253 20 1 Y 128 0 63 -def @arg20 253 8192 1 Y 0 31 8 -def @arg21 253 8192 10 Y 0 31 8 -def @arg22 253 8192 30 Y 0 31 8 -def @arg23 253 8192 8 Y 128 31 63 -def @arg24 253 8192 8 Y 0 31 8 -def @arg25 253 8192 4 Y 128 31 63 -def @arg26 253 8192 4 Y 0 31 8 -def @arg27 253 8192 10 Y 128 31 63 -def @arg28 253 8192 10 Y 0 31 8 -def @arg29 253 8192 8 Y 128 31 63 -def @arg30 253 8192 8 Y 0 31 8 -def @arg31 253 8192 3 Y 0 31 8 -def @arg32 253 8192 6 Y 0 31 8 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday set @my_key= 0 ; @@ -2066,26 +2066,26 @@ def @arg09 253 23 0 Y 128 31 63 def @arg10 253 23 0 Y 128 31 63 def @arg11 253 67 0 Y 128 30 63 def @arg12 253 67 0 Y 128 30 63 -def @arg13 253 8192 0 Y 128 31 63 -def @arg14 253 8192 0 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 0 Y 128 31 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 def @arg17 253 20 0 Y 128 0 63 def @arg18 253 20 0 Y 128 0 63 def @arg19 253 20 0 Y 128 0 63 -def @arg20 253 8192 0 Y 0 31 8 -def @arg21 253 8192 0 Y 0 31 8 -def @arg22 253 8192 0 Y 0 31 8 -def @arg23 253 8192 0 Y 128 31 63 -def @arg24 253 8192 0 Y 0 31 8 -def @arg25 253 8192 0 Y 128 31 63 -def @arg26 253 8192 0 Y 0 31 8 -def @arg27 253 8192 0 Y 128 31 63 -def @arg28 253 8192 0 Y 0 31 8 -def @arg29 253 8192 0 Y 128 31 63 -def @arg30 253 8192 0 Y 0 31 8 -def @arg31 253 8192 0 Y 0 31 8 -def @arg32 253 8192 0 Y 0 31 8 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL prepare stmt1 from "select ? := c1 from t9 where c1= 1" ; @@ -2114,26 +2114,26 @@ def @arg09 253 23 1 Y 128 31 63 def @arg10 253 23 1 Y 128 31 63 def @arg11 253 67 6 Y 128 30 63 def @arg12 253 67 6 Y 128 30 63 -def @arg13 253 8192 10 Y 128 31 63 -def @arg14 253 8192 19 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 8 Y 128 31 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 def @arg17 253 20 4 Y 128 0 63 def @arg18 253 20 1 Y 128 0 63 def @arg19 253 20 1 Y 128 0 63 -def @arg20 253 8192 1 Y 0 31 8 -def @arg21 253 8192 10 Y 0 31 8 -def @arg22 253 8192 30 Y 0 31 8 -def @arg23 253 8192 8 Y 128 31 63 -def @arg24 253 8192 8 Y 0 31 8 -def @arg25 253 8192 4 Y 128 31 63 -def @arg26 253 8192 4 Y 0 31 8 -def @arg27 253 8192 10 Y 128 31 63 -def @arg28 253 8192 10 Y 0 31 8 -def @arg29 253 8192 8 Y 128 31 63 -def @arg30 253 8192 8 Y 0 31 8 -def @arg31 253 8192 3 Y 0 31 8 -def @arg32 253 8192 6 Y 0 31 8 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, @@ -2158,26 +2158,26 @@ def @arg09 253 23 0 Y 128 31 63 def @arg10 253 23 0 Y 128 31 63 def @arg11 253 67 0 Y 128 30 63 def @arg12 253 67 0 Y 128 30 63 -def @arg13 253 8192 0 Y 128 31 63 -def @arg14 253 8192 0 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 0 Y 128 31 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 def @arg17 253 20 0 Y 128 0 63 def @arg18 253 20 0 Y 128 0 63 def @arg19 253 20 0 Y 128 0 63 -def @arg20 253 8192 0 Y 0 31 8 -def @arg21 253 8192 0 Y 0 31 8 -def @arg22 253 8192 0 Y 0 31 8 -def @arg23 253 8192 0 Y 128 31 63 -def @arg24 253 8192 0 Y 0 31 8 -def @arg25 253 8192 0 Y 128 31 63 -def @arg26 253 8192 0 Y 0 31 8 -def @arg27 253 8192 0 Y 128 31 63 -def @arg28 253 8192 0 Y 0 31 8 -def @arg29 253 8192 0 Y 128 31 63 -def @arg30 253 8192 0 Y 0 31 8 -def @arg31 253 8192 0 Y 0 31 8 -def @arg32 253 8192 0 Y 0 31 8 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL prepare stmt1 from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, @@ -2204,26 +2204,26 @@ def @arg09 253 23 1 Y 128 31 63 def @arg10 253 23 1 Y 128 31 63 def @arg11 253 67 6 Y 128 30 63 def @arg12 253 67 6 Y 128 30 63 -def @arg13 253 8192 10 Y 128 31 63 -def @arg14 253 8192 19 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 8 Y 128 31 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 def @arg17 253 20 4 Y 128 0 63 def @arg18 253 20 1 Y 128 0 63 def @arg19 253 20 1 Y 128 0 63 -def @arg20 253 8192 1 Y 0 31 8 -def @arg21 253 8192 10 Y 0 31 8 -def @arg22 253 8192 30 Y 0 31 8 -def @arg23 253 8192 8 Y 128 31 63 -def @arg24 253 8192 8 Y 0 31 8 -def @arg25 253 8192 4 Y 128 31 63 -def @arg26 253 8192 4 Y 0 31 8 -def @arg27 253 8192 10 Y 128 31 63 -def @arg28 253 8192 10 Y 0 31 8 -def @arg29 253 8192 8 Y 128 31 63 -def @arg30 253 8192 8 Y 0 31 8 -def @arg31 253 8192 3 Y 0 31 8 -def @arg32 253 8192 6 Y 0 31 8 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday set @my_key= 0 ; @@ -2242,26 +2242,26 @@ def @arg09 253 23 0 Y 128 31 63 def @arg10 253 23 0 Y 128 31 63 def @arg11 253 67 0 Y 128 30 63 def @arg12 253 67 0 Y 128 30 63 -def @arg13 253 8192 0 Y 128 31 63 -def @arg14 253 8192 0 Y 128 31 63 -def @arg15 253 8192 19 Y 128 31 63 -def @arg16 253 8192 0 Y 128 31 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 def @arg17 253 20 0 Y 128 0 63 def @arg18 253 20 0 Y 128 0 63 def @arg19 253 20 0 Y 128 0 63 -def @arg20 253 8192 0 Y 0 31 8 -def @arg21 253 8192 0 Y 0 31 8 -def @arg22 253 8192 0 Y 0 31 8 -def @arg23 253 8192 0 Y 128 31 63 -def @arg24 253 8192 0 Y 0 31 8 -def @arg25 253 8192 0 Y 128 31 63 -def @arg26 253 8192 0 Y 0 31 8 -def @arg27 253 8192 0 Y 128 31 63 -def @arg28 253 8192 0 Y 0 31 8 -def @arg29 253 8192 0 Y 128 31 63 -def @arg30 253 8192 0 Y 0 31 8 -def @arg31 253 8192 0 Y 0 31 8 -def @arg32 253 8192 0 Y 0 31 8 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 @arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL prepare stmt1 from "select c1 into ? from t9 where c1= 1" ; @@ -2689,21 +2689,21 @@ set @arg00= '1.11111111111111111111e+50' ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1265 Data truncated for column 'c1' at row 1 -Warning 1265 Data truncated for column 'c2' at row 1 -Warning 1265 Data truncated for column 'c3' at row 1 -Warning 1265 Data truncated for column 'c4' at row 1 -Warning 1265 Data truncated for column 'c5' at row 1 -Warning 1265 Data truncated for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 Warning 1264 Out of range value for column 'c7' at row 1 Warning 1264 Out of range value for column 'c12' at row 1 execute my_select ; -c1 1 -c2 1 -c3 1 -c4 1 -c5 1 -c6 1 +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 c7 3.40282e+38 c8 1.11111111111111e+50 c9 1.11111111111111e+50 @@ -2739,21 +2739,21 @@ set @arg00= '-1.11111111111111111111e+50' ; execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00 ; Warnings: -Warning 1265 Data truncated for column 'c1' at row 1 -Warning 1265 Data truncated for column 'c2' at row 1 -Warning 1265 Data truncated for column 'c3' at row 1 -Warning 1265 Data truncated for column 'c4' at row 1 -Warning 1265 Data truncated for column 'c5' at row 1 -Warning 1265 Data truncated for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 Warning 1264 Out of range value for column 'c7' at row 1 Warning 1264 Out of range value for column 'c12' at row 1 execute my_select ; -c1 -1 -c2 -1 -c3 -1 -c4 -1 -c5 -1 -c6 -1 +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 c7 -3.40282e+38 c8 -1.11111111111111e+50 c9 -1.11111111111111e+50 diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 1a44775277b..fa9983690cb 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -493,6 +493,42 @@ insert into t1 values ('a'), ('b'); select c1 from t1 order by c1 limit 1; drop table t1; +# +# Bug #14400 Join could miss concurrently inserted row +# +# Partial key. +create table t1 (a int not null, primary key(a)); +create table t2 (a int not null, b int not null, primary key(a,b)); +insert into t1 values (1),(2),(3),(4),(5),(6); +insert into t2 values (1,1),(2,1); +lock tables t1 read local, t2 read local; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +connect (root,localhost,root,,test,$MASTER_MYPORT,master.sock); +insert into t2 values(2,0); +disconnect root; +connection default; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +drop table t1,t2; +# +# Full key. +CREATE TABLE t1 (c1 varchar(250) NOT NULL); +CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)); +INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003'); +INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004'); +LOCK TABLES t1 READ LOCAL, t2 READ LOCAL; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 + WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +connect (con1,localhost,root,,); +connection con1; +INSERT INTO t2 VALUES ('test000001'), ('test000005'); +disconnect con1; +connection default; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 + WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +DROP TABLE t1,t2; + +# End of 4.0 tests + # # Test RTREE index # diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 9ac880333f6..4545ce99159 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1651,6 +1651,9 @@ extern SHOW_COMP_OPTION have_ndbcluster; extern SHOW_COMP_OPTION have_partition_db; extern SHOW_COMP_OPTION have_merge_db; extern SHOW_COMP_OPTION have_maria_db; +extern handlerton *partition_hton; +extern handlerton *myisam_hton; +extern handlerton *heap_hton; extern SHOW_COMP_OPTION have_row_based_replication; extern SHOW_COMP_OPTION have_openssl, have_symlink, have_dlopen; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 61d0ddbee2f..9dfc725773a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -7095,6 +7095,11 @@ static void mysql_init_variables(void) #else have_csv_db= SHOW_OPTION_NO; #endif +#ifdef WITH_MARIA_STORAGE_ENGINE + have_maria_db= SHOW_OPTION_YES; +#else + have_maria_db= SHOW_OPTION_NO; +#endif #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE have_ndbcluster= SHOW_OPTION_DISABLED; #else diff --git a/sql/set_var.cc b/sql/set_var.cc index 44223ca16f9..c41f3afae60 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -686,7 +686,7 @@ sys_var_have_variable sys_have_federated_db("have_federated_engine", &have_federated_db); sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); -sys_var_have_variable sys_have_maria("have_maria", &have_maria); +sys_var_have_variable sys_have_maria_db("have_maria", &have_maria_db); sys_var_have_variable sys_have_merge_db("have_merge", &have_merge_db); sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); @@ -818,7 +818,7 @@ SHOW_VAR init_vars[]= { {sys_have_federated_db.name,(char*) &have_federated_db, SHOW_HAVE}, {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, - {sys_have_maria.name, (char*) &have_maria, SHOW_HAVE}, + {sys_have_maria_db.name, (char*) &have_maria_db, SHOW_HAVE}, {sys_have_merge_db.name, (char*) &have_merge_db, SHOW_HAVE}, {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 100000fa6cd..eb8cdfa3aba 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -31,7 +31,7 @@ DEFS = @DEFS@ # "." is needed first because tests in unittest need libmaria SUBDIRS = . unittest -EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt +EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in pkgdata_DATA = ma_test_all ma_test_all.res pkglib_LIBRARIES = libmaria.a bin_PROGRAMS = maria_chk maria_pack maria_ftdump diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index a0084ef5e9c..4cdf0c4c086 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -58,9 +58,11 @@ TYPELIB maria_stats_method_typelib= ** MARIA tables *****************************************************************************/ -static handler *maria_create_handler(TABLE_SHARE * table, MEM_ROOT *mem_root) +static handler *maria_create_handler(handlerton *hton, + TABLE_SHARE * table, + MEM_ROOT *mem_root) { - return new (mem_root) ha_maria(table); + return new (mem_root) ha_maria(hton, table); } @@ -147,8 +149,8 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) } -ha_maria::ha_maria(TABLE_SHARE *table_arg): -handler(&maria_hton, table_arg), file(0), +ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg): +handler(hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS | @@ -158,6 +160,15 @@ can_enable_indexes(1) {} +handler *ha_maria::clone(MEM_ROOT *mem_root) +{ + ha_maria *new_handler= static_cast (handler::clone(mem_root)); + if (new_handler) + new_handler->file->state= file->state; + return new_handler; +} + + static const char *ha_maria_exts[]= { MARIA_NAME_IEXT, @@ -355,7 +366,11 @@ int ha_maria::write_row(byte * buf) or a new row, then update the auto_increment value in the record. */ if (table->next_number_field && buf == table->record[0]) - update_auto_increment(); + { + int error; + if ((error= update_auto_increment())) + return error; + } return maria_write(file, buf); } @@ -1818,20 +1833,28 @@ bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, return COMPATIBLE_DATA_YES; } -handlerton maria_hton; - -static int ha_maria_init() +extern int maria_panic(enum ha_panic_function flag); +int maria_panic(handlerton *hton, ha_panic_function flag) { - maria_hton.state=SHOW_OPTION_YES; - maria_hton.db_type=DB_TYPE_MARIA; - maria_hton.create=maria_create_handler; - maria_hton.panic=maria_panic; - maria_hton.flags=HTON_CAN_RECREATE; + return maria_panic(flag); +} + +static int ha_maria_init(void *p) +{ + handlerton *maria_hton; + + maria_hton= (handlerton *)p; + maria_hton->state= SHOW_OPTION_YES; + maria_hton->db_type= DB_TYPE_MARIA; + maria_hton->create= maria_create_handler; + maria_hton->panic= maria_panic; + /* TODO: decide if we support Maria being used for log tables */ + maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; return test(maria_init()); } struct st_mysql_storage_engine maria_storage_engine= -{ MYSQL_HANDLERTON_INTERFACE_VERSION, &maria_hton }; +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; mysql_declare_plugin(maria) { @@ -1840,9 +1863,12 @@ mysql_declare_plugin(maria) "Maria", "MySQL AB", "Traditional transactional MySQL tables", + PLUGIN_LICENSE_GPL, ha_maria_init, /* Plugin Init */ NULL, /* Plugin Deinit */ 0x0100, /* 1.0 */ - 0 + NULL, /* status variables */ + NULL, /* system variables */ + NULL /* config options */ } mysql_declare_plugin_end; diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index cdd2305f6a1..e4edcc80982 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -42,9 +42,9 @@ class ha_maria :public handler int repair(THD * thd, HA_CHECK ¶m, bool optimize); public: - ha_maria(TABLE_SHARE * table_arg); - ~ha_maria() - {} + ha_maria(handlerton *hton, TABLE_SHARE * table_arg); + ~ha_maria() {} + handler *clone(MEM_ROOT *mem_root); const char *table_type() const { return "MARIA"; } const char *index_type(uint key_number); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 624df8a7881..03802ba6989 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -1371,8 +1371,9 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, param->temp_filename); goto err; } - if (maria_filecopy(param,new_file,info->dfile,0L,new_header_length, - "datafile-header")) + if (new_header_length && + maria_filecopy(param,new_file,info->dfile,0L,new_header_length, + "datafile-header")) goto err; info->s->state.dellink= HA_OFFSET_ERROR; info->rec_cache.file=new_file; @@ -2056,8 +2057,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, param->temp_filename); goto err; } - if (maria_filecopy(param, new_file,info->dfile,0L,new_header_length, - "datafile-header")) + if (new_header_length && + maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) goto err; if (param->testflag & T_UNPACK) { @@ -2450,8 +2452,9 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, param->temp_filename); goto err; } - if (maria_filecopy(param, new_file,info->dfile,0L,new_header_length, - "datafile-header")) + if (new_header_length && + maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) goto err; if (param->testflag & T_UNPACK) { diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index d1ad9edbed5..8849c89e30c 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -442,7 +442,7 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *k else { DBUG_PRINT("test",("Inserting of key when deleting")); - if (_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + if (!_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, &tmp)) goto err; ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff, diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 318bbe341e4..476e8f694fc 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -19,7 +19,7 @@ #include "maria_def.h" #include -static int maria_inited= 0; +my_bool maria_inited= FALSE; pthread_mutex_t THR_LOCK_maria; /* @@ -40,7 +40,7 @@ int maria_init(void) { if (!maria_inited) { - maria_inited= 1; + maria_inited= TRUE; pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); } return 0; @@ -51,7 +51,7 @@ void maria_end(void) { if (maria_inited) { - maria_inited= 0; + maria_inited= FALSE; ft_free_stopwords(); pthread_mutex_destroy(&THR_LOCK_maria); } diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index adb4b03bebe..5689d57f2a5 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -183,6 +183,21 @@ int maria_lock_database(MARIA_HA *info, int lock_type) break; /* Impossible */ } } +#ifdef __WIN__ + else + { + /* + Check for bad file descriptors if this table is part + of a merge union. Failing to capture this may cause + a crash on windows if the table is renamed and + later on referenced by the merge table. + */ + if( info->owned_by_merge && (info->s)->kfile < 0 ) + { + error = HA_ERR_NO_SUCH_TABLE; + } + } +#endif pthread_mutex_unlock(&share->intern_lock); DBUG_RETURN(error); } /* maria_lock_database */ diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c index eb99e299f9a..3be893f39f8 100644 --- a/storage/maria/ma_packrec.c +++ b/storage/maria/ma_packrec.c @@ -231,11 +231,19 @@ my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys) { for (i=0 ; i < share->base.keys ; i++) { - share->keyinfo[i].keylength+=(uint16) diff_length; - share->keyinfo[i].minlength+=(uint16) diff_length; - share->keyinfo[i].maxlength+=(uint16) diff_length; - share->keyinfo[i].seg[share->keyinfo[i].keysegs].length= - (uint16) rec_reflength; + MARIA_KEYDEF *keyinfo= &share->keyinfo[i]; + keyinfo->keylength+= (uint16) diff_length; + keyinfo->minlength+= (uint16) diff_length; + keyinfo->maxlength+= (uint16) diff_length; + keyinfo->seg[keyinfo->flag & HA_FULLTEXT ? + FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength; + } + if (share->ft2_keyinfo.seg) + { + MARIA_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo; + ft2_keyinfo->keylength+= (uint16) diff_length; + ft2_keyinfo->minlength+= (uint16) diff_length; + ft2_keyinfo->maxlength+= (uint16) diff_length; } } diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c index 90239b3943b..c1312cb1e77 100644 --- a/storage/maria/ma_panic.c +++ b/storage/maria/ma_panic.c @@ -44,6 +44,8 @@ int maria_panic(enum ha_panic_function flag) MARIA_HA *info; DBUG_ENTER("maria_panic"); + if (!maria_inited) + DBUG_RETURN(0); pthread_mutex_lock(&THR_LOCK_maria); for (list_element=maria_open_list ; list_element ; list_element=next_open) { diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c index 2cb54a73b15..7338c96482f 100644 --- a/storage/maria/ma_rkey.c +++ b/storage/maria/ma_rkey.c @@ -93,29 +93,42 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len maria_read_vec[search_flag], info->s->state.key_root[inx])) { /* - If we are searching for an exact key (including the data pointer) - and this was added by an concurrent insert, - then the result is "key not found". + If we searching for a partial key (or using >, >=, < or <=) and + the data is outside of the data file, we need to continue searching + for the first key inside the data file */ - if ((search_flag == HA_READ_KEY_EXACT) && - (info->lastpos >= info->state->data_file_length)) + if (info->lastpos >= info->state->data_file_length && + (search_flag != HA_READ_KEY_EXACT || + last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) { - my_errno= HA_ERR_KEY_NOT_FOUND; - info->lastpos= HA_OFFSET_ERROR; - } - else while (info->lastpos >= info->state->data_file_length) - { - /* - Skip rows that are inserted by other threads since we got a lock - Note that this can only happen if we are not searching after an - exact key, because the keys are sorted according to position - */ - - if (_ma_search_next(info, keyinfo, info->lastkey, - info->lastkey_length, - maria_readnext_vec[search_flag], - info->s->state.key_root[inx])) - break; + do + { + uint not_used[2]; + /* + Skip rows that are inserted by other threads since we got a lock + Note that this can only happen if we are not searching after an + full length exact key, because the keys are sorted + according to position + */ + if (_ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, + maria_readnext_vec[search_flag], + info->s->state.key_root[inx])) + break; + /* + Check that the found key does still match the search. + _ma_search_next() delivers the next key regardless of its + value. + */ + if (search_flag == HA_READ_KEY_EXACT && + ha_key_cmp(keyinfo->seg, key_buff, info->lastkey, use_key_length, + SEARCH_FIND, not_used)) + { + my_errno= HA_ERR_KEY_NOT_FOUND; + info->lastpos= HA_OFFSET_ERROR; + break; + } + } while (info->lastpos >= info->state->data_file_length); } } } diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index c04a4a51eca..c56a26fefff 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -162,13 +162,13 @@ int maria_write(MARIA_HA *info, byte *record) /* Update status of the table. We need to do so after each row write for the log tables, as we want the new row to become visible to - other threads as soon as possible. We lock mutex here to follow - pthread memory visibility rules. + other threads as soon as possible. We don't lock mutex here + (as it is required by pthread memory visibility rules) as (1) it's + not critical to use outdated share->is_log_table value (2) locking + mutex here for every write is too expensive. */ - pthread_mutex_lock(&share->intern_lock); if (share->is_log_table) _ma_update_status((void*) info); - pthread_mutex_unlock(&share->intern_lock); allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(0); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index e0ba4bdb406..506bdbc71ca 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -305,7 +305,14 @@ struct st_maria_info my_bool page_changed; /* If info->buff has to be reread for rnext */ my_bool buff_used; + /* + TODO: decide if we will have Maria-MERGE tables, and if no, + remove some members here. + */ my_bool once_flags; /* For MARIAMRG */ +#ifdef __WIN__ + my_bool owned_by_merge; /* This Maria table is part of a merge union */ +#endif #ifdef THREAD THR_LOCK_DATA lock; #endif @@ -431,6 +438,7 @@ extern LIST *maria_open_list; extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; extern uint maria_quick_table_bits; +extern my_bool maria_inited; /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c index b840072aed0..4e0b4bf5ce3 100644 --- a/storage/maria/maria_ftdump.c +++ b/storage/maria/maria_ftdump.c @@ -128,7 +128,6 @@ int main(int argc,char *argv[]) if (count || stats) { - doc_cnt++; if (strcmp(buf, buf2)) { if (*buf2) @@ -153,6 +152,7 @@ int main(int argc,char *argv[]) keylen2=keylen; doc_cnt=0; } + doc_cnt+= (subkeys >= 0 ? 1 : -subkeys); } if (dump) { @@ -168,7 +168,6 @@ int main(int argc,char *argv[]) if (count || stats) { - doc_cnt++; if (*buf2) { uniq++; From c2872bafde6d6ec2444c293f7a8aa397eb1dbb59 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 13 Oct 2006 11:37:27 +0200 Subject: [PATCH 37/95] push for trnman review (lockmanager still fails unit tests) BitKeeper/deleted/.del-Makefile.am~4375ae3d4de2bdf0: Delete: unittest/maria/Makefile.am configure.in: silence up configure warnings, don't generate unittest/maria/Makefile include/atomic/nolock.h: s/LOCK/LOCK_prefix/ include/atomic/x86-gcc.h: s/LOCK/LOCK_prefix/ include/atomic/x86-msvc.h: s/LOCK/LOCK_prefix/ include/lf.h: pin asserts, renames include/my_atomic.h: move cleanup include/my_bit.h: s/uint/uint32/ mysys/lf_dynarray.c: style fixes, split for() in two, remove if()s mysys/lf_hash.c: renames, minor fixes mysys/my_atomic.c: run-time assert -> compile-time assert storage/maria/Makefile.am: lockman here storage/maria/unittest/Makefile.am: new unit tests storage/maria/unittest/trnman-t.c: lots of changes storage/maria/lockman.c: many changes: second meaning of "blocker" portability: s/gettimeofday/my_getsystime/ move mutex/cond out of LOCK_OWNER - it creates a race condition that will be fixed in a separate changeset increment lm->count for every element, not only for distinct ones - because we cannot decrease it for distinct elements only :( storage/maria/lockman.h: move mutex/cond out of LOCK_OWNER storage/maria/trnman.c: move mutex/cond out of LOCK_OWNER atomic-ops to access short_trid_to_trn[] storage/maria/trnman.h: move mutex/cond out of LOCK_OWNER storage/maria/unittest/lockman-t.c: unit stress test --- configure.in | 6 +- include/atomic/nolock.h | 4 +- include/atomic/x86-gcc.h | 18 +- include/atomic/x86-msvc.h | 10 +- include/lf.h | 14 +- include/my_atomic.h | 10 +- include/my_bit.h | 4 +- mysys/lf_dynarray.c | 92 ++-- mysys/lf_hash.c | 41 +- mysys/my_atomic.c | 2 +- storage/maria/Makefile.am | 4 +- storage/maria/lockman.c | 681 +++++++++++++++++++++++++++++ storage/maria/lockman.h | 73 ++++ storage/maria/trnman.c | 332 ++++++++++++++ storage/maria/trnman.h | 48 ++ storage/maria/trxman.c | 258 ----------- storage/maria/trxman.h | 28 -- storage/maria/unittest/Makefile.am | 2 +- storage/maria/unittest/lockman-t.c | 246 +++++++++++ storage/maria/unittest/trnman-t.c | 177 ++++++++ unittest/maria/Makefile.am | 12 - unittest/maria/trxman-t.c | 137 ------ 22 files changed, 1653 insertions(+), 546 deletions(-) create mode 100644 storage/maria/lockman.c create mode 100644 storage/maria/lockman.h create mode 100644 storage/maria/trnman.c create mode 100644 storage/maria/trnman.h delete mode 100644 storage/maria/trxman.c delete mode 100644 storage/maria/trxman.h create mode 100644 storage/maria/unittest/lockman-t.c create mode 100644 storage/maria/unittest/trnman-t.c delete mode 100644 unittest/maria/Makefile.am delete mode 100644 unittest/maria/trxman-t.c diff --git a/configure.in b/configure.in index 61ae233be06..7e997f1054c 100644 --- a/configure.in +++ b/configure.in @@ -2260,9 +2260,9 @@ AC_ARG_WITH(man, if test "$with_man" = "yes" then man_dirs="man" - man1_files=`ls -1 $srcdir/man/*.1 | sed -e 's;^.*man/;;'` + man1_files=`ls -1 $srcdir/man/*.1 2>/dev/null| sed -e 's;^.*man/;;'` man1_files=`echo $man1_files` - man8_files=`ls -1 $srcdir/man/*.8 | sed -e 's;^.*man/;;'` + man8_files=`ls -1 $srcdir/man/*.8 2>/dev/null| sed -e 's;^.*man/;;'` man8_files=`echo $man8_files` else man_dirs="" @@ -2481,7 +2481,7 @@ AC_SUBST(MAKE_BINARY_DISTRIBUTION_OPTIONS) # Output results AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl unittest/Makefile unittest/mytap/Makefile unittest/mytap/t/Makefile dnl - unittest/mysys/Makefile unittest/examples/Makefile unittest/maria/Makefile dnl + unittest/mysys/Makefile unittest/examples/Makefile dnl strings/Makefile regex/Makefile storage/Makefile dnl man/Makefile BUILD/Makefile vio/Makefile dnl libmysql/Makefile client/Makefile dnl diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index 21f41484ac9..a696e008f03 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -17,9 +17,9 @@ #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) # ifdef MY_ATOMIC_MODE_DUMMY -# define LOCK "" +# define LOCK_prefix "" # else -# define LOCK "lock" +# define LOCK_prefix "lock" # endif # ifdef __GNUC__ diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index 3f0a82a9400..0be8fdf9244 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -22,15 +22,15 @@ #ifdef __x86_64__ # ifdef MY_ATOMIC_NO_XADD -# define MY_ATOMIC_MODE "gcc-amd64" LOCK "-no-xadd" +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd" # else -# define MY_ATOMIC_MODE "gcc-amd64" LOCK +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix # endif #else # ifdef MY_ATOMIC_NO_XADD -# define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd" # else -# define MY_ATOMIC_MODE "gcc-x86" LOCK +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix # endif #endif @@ -41,12 +41,12 @@ #ifndef MY_ATOMIC_NO_XADD #define make_atomic_add_body(S) \ - asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) + asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) #endif #define make_atomic_swap_body(S) \ - asm volatile ("; xchg %0, %1;" : "+r" (v) , "+m" (*a)) + asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) #define make_atomic_cas_body(S) \ - asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \ + asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ : "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set)) #ifdef MY_ATOMIC_MODE_DUMMY @@ -55,11 +55,11 @@ #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ ret=0; \ - asm volatile (LOCK "; cmpxchg %2, %0" \ + asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ : "+m" (*a), "+a" (ret): "r" (ret)) #define make_atomic_store_body(S) \ asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h index d4024a854fb..8f3e55aaed7 100644 --- a/include/atomic/x86-msvc.h +++ b/include/atomic/x86-msvc.h @@ -26,19 +26,19 @@ #ifndef _atomic_h_cleanup_ #define _atomic_h_cleanup_ "atomic/x86-msvc.h" -#define MY_ATOMIC_MODE "msvc-x86" LOCK +#define MY_ATOMIC_MODE "msvc-x86" LOCK_prefix #define make_atomic_add_body(S) \ _asm { \ _asm mov reg_ ## S, v \ - _asm LOCK xadd *a, reg_ ## S \ + _asm LOCK_prefix xadd *a, reg_ ## S \ _asm movzx v, reg_ ## S \ } #define make_atomic_cas_body(S) \ _asm { \ _asm mov areg_ ## S, *cmp \ _asm mov reg2_ ## S, set \ - _asm LOCK cmpxchg *a, reg2_ ## S \ + _asm LOCK_prefix cmpxchg *a, reg2_ ## S \ _asm mov *cmp, areg_ ## S \ _asm setz al \ _asm movzx ret, al \ @@ -56,13 +56,13 @@ #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ _asm { \ _asm mov areg_ ## S, 0 \ _asm mov reg2_ ## S, areg_ ## S \ - _asm LOCK cmpxchg *a, reg2_ ## S \ + _asm LOCK_prefix cmpxchg *a, reg2_ ## S \ _asm mov ret, areg_ ## S \ } #define make_atomic_store_body(S) \ diff --git a/include/lf.h b/include/lf.h index 6a5047f6052..4c6765b2d40 100644 --- a/include/lf.h +++ b/include/lf.h @@ -88,8 +88,8 @@ nolock_wrap(lf_dynarray_iterate, int, pin manager for memory allocator */ -#define LF_PINBOX_PINS 3 -#define LF_PURGATORY_SIZE 11 +#define LF_PINBOX_PINS 4 +#define LF_PURGATORY_SIZE 10 typedef void lf_pinbox_free_func(void *, void *); @@ -112,9 +112,9 @@ typedef struct { -sizeof(void *)*(LF_PINBOX_PINS+LF_PURGATORY_SIZE+1)]; } LF_PINS; -#define lf_lock_by_pins(PINS) \ +#define lf_rwlock_by_pins(PINS) \ my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock) -#define lf_unlock_by_pins(PINS) \ +#define lf_rwunlock_by_pins(PINS) \ my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinstack.lock) /* @@ -139,11 +139,13 @@ typedef struct { #define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL) #define lf_pin(PINS, PIN, ADDR) \ do { \ - lf_lock_by_pins(PINS); \ + lf_rwlock_by_pins(PINS); \ _lf_pin(PINS, PIN, ADDR); \ - lf_unlock_by_pins(PINS); \ + lf_rwunlock_by_pins(PINS); \ } while (0) #define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) +#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0) +#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN]==0) void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, void * free_func_arg); diff --git a/include/my_atomic.h b/include/my_atomic.h index d3e4e0055d3..921b55e68a2 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -118,6 +118,11 @@ make_atomic_swap(16) make_atomic_swap(32) make_atomic_swap(ptr) +#ifdef _atomic_h_cleanup_ +#include _atomic_h_cleanup_ +#undef _atomic_h_cleanup_ +#endif + #undef make_atomic_add #undef make_atomic_cas #undef make_atomic_load @@ -130,11 +135,6 @@ make_atomic_swap(ptr) #undef make_atomic_swap_body #undef intptr -#ifdef _atomic_h_cleanup_ -#include _atomic_h_cleanup_ -#undef _atomic_h_cleanup_ -#endif - #ifndef LF_BACKOFF #define LF_BACKOFF (1) #endif diff --git a/include/my_bit.h b/include/my_bit.h index 71bbe2d4ba3..58e8bb39683 100644 --- a/include/my_bit.h +++ b/include/my_bit.h @@ -88,7 +88,7 @@ STATIC_INLINE uint32 my_clear_highest_bit(uint32 v) return v & w; } -STATIC_INLINE uint32 my_reverse_bits(uint key) +STATIC_INLINE uint32 my_reverse_bits(uint32 key) { return (_my_bits_reverse_table[ key & 255] << 24) | @@ -101,7 +101,7 @@ STATIC_INLINE uint32 my_reverse_bits(uint key) extern uint my_bit_log2(ulong value); extern uint32 my_round_up_to_next_power(uint32 v); uint32 my_clear_highest_bit(uint32 v); -uint32 my_reverse_bits(uint key); +uint32 my_reverse_bits(uint32 key); extern uint my_count_bits(ulonglong v); extern uint my_count_bits_ushort(ushort v); #endif diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index 0fa04ab095c..ade1c28d51c 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -38,7 +38,7 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) { bzero(array, sizeof(*array)); - array->size_of_element=element_size; + array->size_of_element= element_size; my_atomic_rwlock_init(&array->lock); } @@ -49,7 +49,7 @@ static void recursive_free(void **alloc, int level) if (level) { int i; - for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) recursive_free(alloc[i], level-1); my_free((void *)alloc, MYF(0)); } @@ -60,13 +60,13 @@ static void recursive_free(void **alloc, int level) void lf_dynarray_destroy(LF_DYNARRAY *array) { int i; - for (i=0; i < LF_DYNARRAY_LEVELS; i++) + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) recursive_free(array->level[i], i); my_atomic_rwlock_destroy(&array->lock); bzero(array, sizeof(*array)); } -static const int dynarray_idxes_in_level[LF_DYNARRAY_LEVELS]= +static const int dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= { 0, /* +1 here to to avoid -1's below */ LF_DYNARRAY_LEVEL_LENGTH, @@ -77,41 +77,32 @@ static const int dynarray_idxes_in_level[LF_DYNARRAY_LEVELS]= void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) { - void * ptr, * volatile * ptr_ptr=0; + void * ptr, * volatile * ptr_ptr= 0; int i; - for (i=3; i > 0; i--) + for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_level[i]; + for (; i > 0; i--) { - if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) + if (!(ptr= *ptr_ptr)) { - if (!ptr_ptr) - { - ptr_ptr=&array->level[i]; - idx-= dynarray_idxes_in_level[i]; - } - ptr=*ptr_ptr; - if (!ptr) - { - void *alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), - MYF(MY_WME|MY_ZEROFILL)); - if (!alloc) - return(NULL); - if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) - ptr= alloc; - else - my_free(alloc, MYF(0)); - } - ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; - idx%= dynarray_idxes_in_level[i]; + void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), + MYF(MY_WME|MY_ZEROFILL)); + if (!alloc) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc, MYF(0)); } + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx%= dynarray_idxes_in_prev_level[i]; } - if (!ptr_ptr) - ptr_ptr=&array->level[0]; - ptr=*ptr_ptr; - if (!ptr) + if (!(ptr= *ptr_ptr)) { void *alloc, *data; - alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + max(array->size_of_element, sizeof(void *)), MYF(MY_WME|MY_ZEROFILL)); if (!alloc) @@ -123,7 +114,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) if (mod) data+= array->size_of_element - mod; } - ((void **)data)[-1]=alloc; /* free() will need the original pointer */ + ((void **)data)[-1]= alloc; /* free() will need the original pointer */ if (my_atomic_casptr(ptr_ptr, &ptr, data)) ptr= data; else @@ -134,29 +125,20 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) { - void * ptr, * volatile * ptr_ptr=0; + void * ptr, * volatile * ptr_ptr= 0; int i; - for (i=3; i > 0; i--) + for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_level[i]; + for (; i > 0; i--) { - if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) - { - if (!ptr_ptr) - { - ptr_ptr=&array->level[i]; - idx-= dynarray_idxes_in_level[i]; - } - ptr=*ptr_ptr; - if (!ptr) - return(NULL); - ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; - idx %= dynarray_idxes_in_level[i]; - } + if (!(ptr= *ptr_ptr)) + return(NULL); + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx %= dynarray_idxes_in_prev_level[i]; } - if (!ptr_ptr) - ptr_ptr=&array->level[0]; - ptr=*ptr_ptr; - if (!ptr) + if (!(ptr= *ptr_ptr)) return(NULL); return ptr + array->size_of_element * idx; } @@ -169,8 +151,8 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, return 0; if (!level) return func(ptr, arg); - for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) - if ((res=recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) return res; return 0; } @@ -178,8 +160,8 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) { int i, res; - for (i=0; i < LF_DYNARRAY_LEVELS; i++) - if ((res=recursive_iterate(array, array->level[i], i, func, arg))) + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + if ((res= recursive_iterate(array, array->level[i], i, func, arg))) return res; return 0; } diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 736c3ea4887..a0425e89556 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -19,6 +19,7 @@ TODO try to get rid of dummy nodes ? + for non-unique hash, count only _distinct_ values */ #include #include @@ -51,7 +52,7 @@ typedef struct { cursor is positioned in either case pins[0..2] are used, they are NOT removed on return */ -static int lfind(intptr volatile *head, uint32 hashnr, +static int lfind(LF_SLIST * volatile *head, uint32 hashnr, const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; @@ -60,7 +61,7 @@ static int lfind(intptr volatile *head, uint32 hashnr, intptr link; retry: - cursor->prev=head; + cursor->prev=(intptr *)head; do { cursor->curr=PTR(*cursor->prev); _lf_pin(pins,1,cursor->curr); @@ -112,7 +113,7 @@ retry: /* RETURN 0 - inserted - not 0 - a pointer to a conflict + not 0 - a pointer to a conflict (not pinned and thus unusable) NOTE it uses pins[0..2], on return all pins are removed. @@ -125,17 +126,17 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, do { - if (lfind((intptr*)head, node->hashnr, node->key, node->keylen, + if (lfind(head, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) - res=0; + res=0; /* duplicate found */ else { node->link=(intptr)cursor.curr; assert(node->link != (intptr)node); assert(cursor.prev != &node->link); if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) - res=1; + res=1; /* inserted ok */ } } while (res == -1); _lf_unpin(pins, 0); @@ -159,7 +160,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, do { - if (!lfind((intptr *)head, hashnr, key, keylen, &cursor, pins)) + if (!lfind(head, hashnr, key, keylen, &cursor, pins)) res= 1; else if (my_atomic_casptr((void **)&(cursor.curr->link), @@ -169,7 +170,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, (void **)&cursor.curr, cursor.next)) _lf_alloc_free(pins, cursor.curr); else - lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + lfind(head, hashnr, key, keylen, &cursor, pins); res= 0; } } while (res == -1); @@ -191,7 +192,7 @@ static LF_SLIST *lsearch(LF_SLIST * volatile *head, uint32 hashnr, const uchar *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res=lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + int res=lfind(head, hashnr, key, keylen, &cursor, pins); if (res) _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); @@ -214,7 +215,7 @@ static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) return nr1 & INT_MAX32; } -#define MAX_LOAD 1 +#define MAX_LOAD 1.0 static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, @@ -262,7 +263,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) uint csize, bucket, hashnr; LF_SLIST *node, * volatile *el; - lf_lock_by_pins(pins); + lf_rwlock_by_pins(pins); node=(LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); @@ -275,13 +276,13 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) if (linsert(el, node, pins, hash->flags)) { _lf_alloc_free(pins, node); - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 1; } csize= hash->size; if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) my_atomic_cas32(&hash->size, &csize, csize*2); - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 0; } @@ -298,17 +299,17 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; - lf_lock_by_pins(pins); + lf_rwlock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); if (ldelete(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins)) { - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 1; } my_atomic_add32(&hash->count, -1); - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 0; } @@ -322,13 +323,13 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; - lf_lock_by_pins(pins); + lf_rwlock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); found= lsearch(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins); - lf_unlock_by_pins(pins); - return found+1; + lf_rwunlock_by_pins(pins); + return found ? found+1 : 0; } static char *dummy_key=""; @@ -347,7 +348,7 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, dummy->keylen=0; if ((cur= linsert(el, dummy, pins, 0))) { - _lf_alloc_free(pins, dummy); + my_free((void *)dummy, MYF(0)); dummy= cur; } my_atomic_casptr((void **)node, (void **)&tmp, dummy); diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index 2908a44961a..fbeb3d63bef 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -35,7 +35,7 @@ */ int my_atomic_initialize() { - DBUG_ASSERT(sizeof(intptr) == sizeof(void *)); + char assert_the_size[sizeof(intptr) == sizeof(void *) ? 1 : -1]; /* currently the only thing worth checking is SMP/UP issue */ #ifdef MY_ATOMIC_MODE_DUMMY return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU; diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 54fd70d7ae5..4f348cd2894 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -53,7 +53,7 @@ maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ - ma_ft_eval.h trxman.h \ + ma_ft_eval.h trnman.h lockman.h \ ma_control_file.h ha_maria.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ @@ -108,7 +108,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_keycache.c ma_preload.c ma_ft_parser.c \ ma_ft_update.c ma_ft_boolean_search.c \ ma_ft_nlq_search.c ft_maria.c ma_sort.c \ - ha_maria.cc trxman.c \ + ha_maria.cc trnman.c lockman.c \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ ma_sp_key.c ma_control_file.c CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c new file mode 100644 index 00000000000..e8ddbd1a25a --- /dev/null +++ b/storage/maria/lockman.c @@ -0,0 +1,681 @@ +// TODO lock escalation, instant duration locks +// automatically place S instead of LS if possible +/* + TODO optimization: table locks - they have completely + different characteristics. long lists, few distinct resources - + slow to scan, [possibly] high retry rate +*/ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include +#include +#include +#include +#include "lockman.h" + +/* + Lock compatibility matrix. + + It's asymmetric. Read it as "Somebody has the lock , can I set the lock ?" + + ') Though you can take LS lock while somebody has S lock, it makes no + sense - it's simpler to take S lock too. + + ") Strictly speaking you can take LX lock while somebody has S lock. + But in this case you lock no rows, because all rows are locked by this + somebody. So we prefer to define that LX cannot be taken when S + exists. Same about LX and X. + + 1 - compatible + 0 - incompatible + -1 - "impossible", so that we can assert the impossibility. +*/ +static int lock_compatibility_matrix[10][10]= +{ /* N S X IS IX SIX LS LX SLX LSIX */ + { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ + { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ + { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */ +}; + +/* + Lock combining matrix. + + It's symmetric. Read it as "what lock level L is identical to the + set of two locks A and B" + + One should never get N from it, we assert the impossibility +*/ +static enum lock_type lock_combining_matrix[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */ + { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { X, X, X, X, X, X, X, X, X, X}, /* X */ + { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ +}; + +#define REPEAT_ONCE_MORE 0 +#define OK_TO_PLACE_THE_LOCK 1 +#define OK_TO_PLACE_THE_REQUEST 2 +#define ALREADY_HAVE_THE_LOCK 4 +#define ALREADY_HAVE_THE_REQUEST 8 +#define PLACE_NEW_DISABLE_OLD 16 +#define REQUEST_NEW_DISABLE_OLD 32 +#define RESOURCE_WAS_UNLOCKED 64 + +#define NEED_TO_WAIT (OK_TO_PLACE_THE_REQUEST | ALREADY_HAVE_THE_REQUEST |\ + REQUEST_NEW_DISABLE_OLD) +#define ALREADY_HAVE (ALREADY_HAVE_THE_LOCK | ALREADY_HAVE_THE_REQUEST) +#define LOCK_UPGRADE (PLACE_NEW_DISABLE_OLD | REQUEST_NEW_DISABLE_OLD) + + +/* + the return codes for lockman_getlock + + It's asymmetric. Read it as "I have the lock , + what value should be returned for ?" + + 0 means impossible combination (assert!) + + Defines below help to preserve the table structure. + I/L/A values are self explanatory + x means the combination is possible (assert should not crash) + but cannot happen in row locks, only in table locks (S,X), or + lock escalations (LS,LX) +*/ +#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE +#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +#define A GOT_THE_LOCK +#define x GOT_THE_LOCK +static enum lockman_getlock_result getlock_result[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ + { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ + { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */ + { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */ + { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */ + { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */ + { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */ + { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */ + { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */ + { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */ +}; +#undef I +#undef L +#undef A +#undef x + +LF_REQUIRE_PINS(4); + +typedef struct lockman_lock { + uint64 resource; + struct lockman_lock *lonext; + intptr volatile link; + uint32 hashnr; +//#warning TODO - remove hashnr from LOCK + uint16 loid; + uchar lock; /* sizeof(uchar) <= sizeof(enum) */ + uchar flags; +} LOCK; + +#define IGNORE_ME 1 +#define UPGRADED 2 + +typedef struct { + intptr volatile *prev; + LOCK *curr, *next; + LOCK *blocker, *upgrade_from; +} CURSOR; + +#define PTR(V) (LOCK *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + NOTE + cursor is positioned in either case + pins[0..3] are used, they are NOT removed on return +*/ +static int lockfind(LOCK * volatile *head, LOCK *node, + CURSOR *cursor, LF_PINS *pins) +{ + uint32 hashnr, cur_hashnr; + uint64 resource, cur_resource; + intptr link; + my_bool cur_active, compatible, upgrading, prev_active; + enum lock_type lock, prev_lock, cur_lock; + uint16 loid, cur_loid; + int upgraded_pairs, cur_flags, flags; + + hashnr= node->hashnr; + resource= node->resource; + lock= node->lock; + loid= node->loid; + flags= node->flags; + +retry: + cursor->prev= (intptr *)head; + prev_lock= N; + cur_active= TRUE; + compatible= TRUE; + upgrading= FALSE; + cursor->blocker= cursor->upgrade_from= 0; + _lf_unpin(pins, 3); + upgraded_pairs= 0; + do { + cursor->curr= PTR(*cursor->prev); + _lf_pin(pins,1,cursor->curr); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (!cursor->curr) + break; + do { + link= cursor->curr->link; + cursor->next= PTR(link); + _lf_pin(pins, 0, cursor->next); + } while(link != cursor->curr->link && LF_BACKOFF); + cur_hashnr= cursor->curr->hashnr; + cur_resource= cursor->curr->resource; + cur_lock= cursor->curr->lock; + cur_loid= cursor->curr->loid; + cur_flags= cursor->curr->flags; + if (*cursor->prev != (intptr)cursor->curr) + { + LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr > hashnr || + (cur_hashnr == hashnr && cur_resource >= resource)) + { + if (cur_hashnr > hashnr || cur_resource > resource) + { + if (upgraded_pairs != 0) + goto retry; + break; + } + /* ok, we have a lock for this resource */ + DBUG_ASSERT(lock_compatibility_matrix[prev_lock][cur_lock] >= 0); + DBUG_ASSERT(lock_compatibility_matrix[cur_lock][lock] >= 0); + if (cur_flags & UPGRADED) + upgraded_pairs++; + if ((cur_flags & IGNORE_ME) && ! (flags & IGNORE_ME)) + { + DBUG_ASSERT(cur_active); + upgraded_pairs--; + if (cur_loid == loid) + cursor->upgrade_from= cursor->curr; + } + else + { + prev_active= cur_active; + cur_active&= lock_compatibility_matrix[prev_lock][cur_lock]; + if (upgrading && !cur_active && upgraded_pairs == 0) + break; + if (prev_active && !cur_active) + { + cursor->blocker= cursor->curr; + _lf_pin(pins, 3, cursor->curr); + } + if (cur_loid == loid) + { + /* we already have a lock on this resource */ + DBUG_ASSERT(lock_combining_matrix[cur_lock][lock] != N); + DBUG_ASSERT(!upgrading); /* can happen only once */ + if (lock_combining_matrix[cur_lock][lock] == cur_lock) + { + /* new lock is compatible */ + return cur_active ? ALREADY_HAVE_THE_LOCK + : ALREADY_HAVE_THE_REQUEST; + } + /* not compatible, upgrading */ + upgrading= TRUE; + cursor->upgrade_from= cursor->curr; + } + else + { + if (!lock_compatibility_matrix[cur_lock][lock]) + { + compatible= FALSE; + cursor->blocker= cursor->curr; + _lf_pin(pins, 3, cursor->curr); + } + prev_lock= lock_combining_matrix[prev_lock][cur_lock]; + DBUG_ASSERT(prev_lock != N); + } + } + } + cursor->prev= &(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + LF_BACKOFF; + goto retry; + } + } + cursor->curr= cursor->next; + _lf_pin(pins, 1, cursor->curr); + } + /* + either the end of lock list - no more locks for this resource, + or upgrading and the end of active lock list + */ + if (upgrading) + { + if (compatible) + return PLACE_NEW_DISABLE_OLD; + else + return REQUEST_NEW_DISABLE_OLD; + } + if (cur_active && compatible) + { + /* + either no locks for this resource or all are compatible. + ok to place the lock in any case. + */ + return prev_lock == N ? RESOURCE_WAS_UNLOCKED + : OK_TO_PLACE_THE_LOCK; + } + /* we have a lock conflict. ok to place a lock request. And wait */ + return OK_TO_PLACE_THE_REQUEST; +} + +/* + NOTE + it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays +*/ +static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, + LOCK **blocker) +{ + CURSOR cursor; + int res; + + do + { + res= lockfind(head, node, &cursor, pins); + DBUG_ASSERT(res != ALREADY_HAVE_THE_REQUEST); + if (!(res & ALREADY_HAVE)) + { + if (res & LOCK_UPGRADE) + { + node->flags|= UPGRADED; + node->lock= lock_combining_matrix[cursor.upgrade_from->lock][node->lock]; + } + node->link= (intptr)cursor.curr; + DBUG_ASSERT(node->link != (intptr)node); + DBUG_ASSERT(cursor.prev != &node->link); + if (!my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + res= REPEAT_ONCE_MORE; + if (res & LOCK_UPGRADE) + cursor.upgrade_from->flags|= IGNORE_ME; + } + + } while (res == REPEAT_ONCE_MORE); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + /* + note that cursor.curr is NOT pinned on return. + this is ok as it's either a dummy node for initialize_bucket + and dummy nodes don't need pinning, + or it's a lock of the same transaction for lockman_getlock, + and it cannot be removed by another thread + */ + *blocker= cursor.blocker ? cursor.blocker : cursor.curr; + return res; +} + +/* + NOTE + it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays +*/ +static int lockpeek(LOCK * volatile *head, LOCK *node, LF_PINS *pins, + LOCK **blocker) +{ + CURSOR cursor; + int res; + + res= lockfind(head, node, &cursor, pins); + + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + if (blocker) + *blocker= cursor.blocker; + return res; +} + +/* + NOTE + it uses pins[0..3], on return all pins are removed. + + One _must_ have the lock (or request) to call this +*/ +static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) +{ + CURSOR cursor; + int res; + + do + { + res= lockfind(head, node, &cursor, pins); + DBUG_ASSERT(res & ALREADY_HAVE); + + if (cursor.upgrade_from) + cursor.upgrade_from->flags&= ~IGNORE_ME; + + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, 1+(char *)cursor.next)) + { + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + lockfind(head, node, &cursor, pins); + } + else + res= REPEAT_ONCE_MORE; + } while (res == REPEAT_ONCE_MORE); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + _lf_unpin(pins, 3); + return res; +} + +void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout) +{ + lf_alloc_init(&lm->alloc,sizeof(LOCK)); + lf_dynarray_init(&lm->array, sizeof(LOCK **)); + lm->size= 1; + lm->count= 0; + lm->loid_to_lo= func; + lm->lock_timeout= timeout; +} + +void lockman_destroy(LOCKMAN *lm) +{ + LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + lf_alloc_real_free(&lm->alloc, el); + else + my_free((void *)el, MYF(0)); + el= (LOCK *)next; + } + lf_alloc_destroy(&lm->alloc); + lf_dynarray_destroy(&lm->array); +} + +/* TODO: optimize it */ +#define MAX_LOAD 1 + +static void initialize_bucket(LOCKMAN *lm, LOCK * volatile *node, + uint bucket, LF_PINS *pins) +{ + int res; + uint parent= my_clear_highest_bit(bucket); + LOCK *dummy= (LOCK *)my_malloc(sizeof(LOCK), MYF(MY_WME)); + LOCK **tmp= 0, *cur; + LOCK * volatile *el= _lf_dynarray_lvalue(&lm->array, parent); + + if (*el == NULL && bucket) + initialize_bucket(lm, el, parent, pins); + dummy->hashnr= my_reverse_bits(bucket); + dummy->loid= 0; + dummy->lock= X; /* doesn't matter, in fact */ + dummy->resource= 0; + dummy->flags= 0; + res= lockinsert(el, dummy, pins, &cur); + DBUG_ASSERT(res & (ALREADY_HAVE_THE_LOCK | RESOURCE_WAS_UNLOCKED)); + if (res & ALREADY_HAVE_THE_LOCK) + { + my_free((void *)dummy, MYF(0)); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); +} + +static inline uint calc_hash(uint64 resource) +{ + const uchar *pos= (uchar *)&resource; + ulong nr1= 1, nr2= 4, i; + for (i= 0; i < sizeof(resource) ; i++, pos++) + { + nr1^= (ulong) ((((uint) nr1 & 63)+nr2) * ((uint)*pos)) + (nr1 << 8); + nr2+= 3; + } + return nr1 & INT_MAX32; +} + +/* + RETURN + see enum lockman_getlock_result + NOTE + uses pins[0..3], they're removed on return +*/ +enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, + uint64 resource, + enum lock_type lock) +{ + int res; + uint csize, bucket, hashnr; + LOCK *node, * volatile *el, *blocker; + LF_PINS *pins= lo->pins; + enum lock_type old_lock; + + DBUG_ASSERT(lo->loid); + lf_rwlock_by_pins(pins); + node= (LOCK *)_lf_alloc_new(pins); + node->flags= 0; + node->lock= lock; + node->loid= lo->loid; + node->resource= resource; + hashnr= calc_hash(resource); + bucket= hashnr % lm->size; + el= _lf_dynarray_lvalue(&lm->array, bucket); + if (*el == NULL) + initialize_bucket(lm, el, bucket, pins); + node->hashnr= my_reverse_bits(hashnr) | 1; + res= lockinsert(el, node, pins, &blocker); + if (res & ALREADY_HAVE) + { + old_lock= blocker->lock; + _lf_assert_unpin(pins, 3); /* unpin should not be needed */ + _lf_alloc_free(pins, node); + lf_rwunlock_by_pins(pins); + res= getlock_result[old_lock][lock]; + DBUG_ASSERT(res); + return res; + } + /* a new value was added to the hash */ + csize= lm->size; + if ((my_atomic_add32(&lm->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&lm->size, &csize, csize*2); + node->lonext= lo->all_locks; + lo->all_locks= node; + for ( ; res & NEED_TO_WAIT; res= lockpeek(el, node, pins, &blocker)) + { + LOCK_OWNER *wait_for_lo; + ulonglong deadline; + struct timespec timeout; + + _lf_assert_pin(pins, 3); /* blocker must be pinned here */ + lf_rwunlock_by_pins(pins); + + wait_for_lo= lm->loid_to_lo(blocker->loid); + /* + now, this is tricky. blocker is not necessarily a LOCK + we're waiting for. If it's compatible with what we want, + then we're waiting for a lock that blocker is waiting for + (see two places where blocker is set in lockfind) + In the latter case, let's "dereference" it + */ + if (lock_compatibility_matrix[blocker->lock][lock]) + { + blocker= wait_for_lo->all_locks; + lf_pin(pins, 3, blocker); + if (blocker != wait_for_lo->all_locks) + { + lf_rwlock_by_pins(pins); + continue; + } + wait_for_lo= wait_for_lo->waiting_for; + } + + /* + note that the blocker transaction may have ended by now, + its LOCK_OWNER and short id were reused, so 'wait_for_lo' may point + to an unrelated - albeit valid - LOCK_OWNER + */ + if (!wait_for_lo) + { + /* blocker transaction has ended, short id was released */ + lf_rwlock_by_pins(pins); + continue; + } + /* + We lock a mutex - it may belong to a wrong LOCK_OWNER, but it must + belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER, + if there're other active LOCK_OWNERs. + */ +#warning race condition here + pthread_mutex_lock(wait_for_lo->mutex); + if (DELETED(blocker->link)) + { + /* + blocker transaction was ended, or a savepoint that owned + the lock was rolled back. Either way - the lock was removed + */ + pthread_mutex_unlock(wait_for_lo->mutex); + lf_rwlock_by_pins(pins); + continue; + } + /* yuck. waiting */ + lo->waiting_for= wait_for_lo; + + deadline= my_getsystime() + lm->lock_timeout * 10000; + timeout.tv_sec= deadline/10000000; + timeout.tv_nsec= (deadline % 10000000) * 100; + do + { + pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout); + } while (!DELETED(blocker->link) && my_getsystime() < deadline); + pthread_mutex_unlock(wait_for_lo->mutex); + lf_rwlock_by_pins(pins); + if (!DELETED(blocker->link)) + { + /* + timeout. + note that we _don't_ release the lock request here. + Instead we're relying on the caller to abort the transaction, + and release all locks at once - see lockman_release_locks() + */ + lf_rwunlock_by_pins(pins); + return DIDNT_GET_THE_LOCK; + } + lo->waiting_for= 0; + } + _lf_assert_unpin(pins, 3); /* unpin should not be needed */ + lf_rwunlock_by_pins(pins); + return getlock_result[lock][lock]; +} + +/* + RETURN + 0 - deleted + 1 - didn't (not found) + NOTE + see lockdelete() for pin usage notes +*/ +int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo) +{ + LOCK * volatile *el, *node; + uint bucket; + LF_PINS *pins= lo->pins; + + pthread_mutex_lock(lo->mutex); + lf_rwlock_by_pins(pins); + for (node= lo->all_locks; node; node= node->lonext) + { + bucket= calc_hash(node->resource) % lm->size; + el= _lf_dynarray_lvalue(&lm->array, bucket); + if (*el == NULL) + initialize_bucket(lm, el, bucket, pins); + lockdelete(el, node, pins); + my_atomic_add32(&lm->count, -1); + } + lf_rwunlock_by_pins(pins); + lo->all_locks= 0; + /* now signal all waiters */ + pthread_cond_broadcast(lo->cond); + pthread_mutex_unlock(lo->mutex); + return 0; +} + +#ifdef MY_LF_EXTRA_DEBUG +/* + NOTE + the function below is NOT thread-safe !!! +*/ +static char *lock2str[]= +{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" }; +void print_lockhash(LOCKMAN *lm) +{ + LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); + printf("hash: size=%u count=%u\n", lm->size, lm->count); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + printf("0x%08x { resource %llu, loid %u, lock %s", + el->hashnr, el->resource, el->loid, lock2str[el->lock]); + else + { + printf("0x%08x { dummy ", el->hashnr); + DBUG_ASSERT(el->resource == 0 && el->loid == 0 && el->lock == X); + } + if (el->flags & IGNORE_ME) printf(" IGNORE_ME"); + if (el->flags & UPGRADED) printf(" UPGRADED"); + printf("}\n"); + el= (LOCK *)next; + } +} +#endif + diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h new file mode 100644 index 00000000000..a3c96786935 --- /dev/null +++ b/storage/maria/lockman.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _lockman_h +#define _lockman_h + +/* + N - "no lock", not a lock, used sometimes to simplify the code + S - Shared + X - eXclusive + IS - Intention Shared + IX - Intention eXclusive + SIX - Shared + Intention eXclusive + LS - Loose Shared + LX - Loose eXclusive + SLX - Shared + Loose eXclusive + LSIX - Loose Shared + Intention eXclusive +*/ +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX }; + +struct lockman_lock; + +typedef struct st_lock_owner LOCK_OWNER; +struct st_lock_owner { + LF_PINS *pins; + struct lockman_lock *all_locks; + LOCK_OWNER *waiting_for; + pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid; +}; + +typedef LOCK_OWNER *loid_to_lo_func(uint16); +typedef struct { + LF_DYNARRAY array; /* hash itself */ + LF_ALLOCATOR alloc; /* allocator for elements */ + int32 volatile size; /* size of array */ + int32 volatile count; /* number of elements in the hash */ + uint lock_timeout; + loid_to_lo_func *loid_to_lo; +} LOCKMAN; + +enum lockman_getlock_result { + DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK, + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +}; + +void lockman_init(LOCKMAN *, loid_to_lo_func *, uint); +void lockman_destroy(LOCKMAN *); +enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, + uint64 resource, + enum lock_type lock); +int lockman_release_locks(LOCKMAN *, LOCK_OWNER *); + +#ifdef EXTRA_DEBUG +void print_lockhash(LOCKMAN *lm); +#endif + +#endif diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c new file mode 100644 index 00000000000..49f49a3e26b --- /dev/null +++ b/storage/maria/trnman.c @@ -0,0 +1,332 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#include +#include +#include +#include "trnman.h" + +uint trnman_active_transactions, trnman_allocated_transactions; + +static TRN active_list_min, active_list_max, + committed_list_min, committed_list_max, *pool; + +static pthread_mutex_t LOCK_trn_list; +static TrID global_trid_generator; + +static LF_HASH trid_to_trn; +static LOCKMAN maria_lockman; + +static TRN **short_trid_to_trn; +static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool; + +static byte *trn_get_hash_key(const byte *trn,uint* len, my_bool unused) +{ + *len= sizeof(TrID); + return (byte *) & ((*((TRN **)trn))->trid); +} + +static LOCK_OWNER *trnman_short_trid_to_TRN(uint16 short_trid) +{ + TRN *trn; + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + trn= my_atomic_loadptr((void **)&short_trid_to_trn[short_trid]); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + return (LOCK_OWNER *)trn; +} + +int trnman_init() +{ + pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST); + active_list_max.trid= active_list_min.trid= 0; + active_list_max.min_read_from= ~0; + active_list_max.next= active_list_min.prev= 0; + active_list_max.prev= &active_list_min; + active_list_min.next= &active_list_max; + trnman_active_transactions= 0; + trnman_allocated_transactions= 0; + + committed_list_max.commit_trid= ~0; + committed_list_max.next= committed_list_min.prev= 0; + committed_list_max.prev= &committed_list_min; + committed_list_min.next= &committed_list_max; + + pool= 0; + global_trid_generator= 0; /* set later by recovery code */ + lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE, + 0, 0, trn_get_hash_key, 0); + my_atomic_rwlock_init(&LOCK_short_trid_to_trn); + my_atomic_rwlock_init(&LOCK_pool); + short_trid_to_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), + MYF(MY_WME|MY_ZEROFILL)); + if (!short_trid_to_trn) + return 1; + short_trid_to_trn--; /* min short_trid is 1 */ + + lockman_init(&maria_lockman, &trnman_short_trid_to_TRN, 10000); + + return 0; +} + +int trnman_destroy() +{ + DBUG_ASSERT(trid_to_trn.count == 0); + DBUG_ASSERT(trnman_active_transactions == 0); + DBUG_ASSERT(active_list_max.prev == &active_list_min); + DBUG_ASSERT(active_list_min.next == &active_list_max); + DBUG_ASSERT(committed_list_max.prev == &committed_list_min); + DBUG_ASSERT(committed_list_min.next == &committed_list_max); + while (pool) + { + TRN *trn= pool; + pool= pool->next; + DBUG_ASSERT(trn->locks.mutex == 0); + DBUG_ASSERT(trn->locks.cond == 0); + my_free((void *)trn, MYF(0)); + } + lf_hash_destroy(&trid_to_trn); + pthread_mutex_destroy(&LOCK_trn_list); + my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn); + my_atomic_rwlock_destroy(&LOCK_pool); + my_free((void *)(short_trid_to_trn+1), MYF(0)); + lockman_destroy(&maria_lockman); +} + +static TrID new_trid() +{ + DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); + safe_mutex_assert_owner(&LOCK_trn_list); + return ++global_trid_generator; +} + +static void set_short_trid(TRN *trn) +{ + int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX; + my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn); + for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */ + { + void *tmp= NULL; + if (short_trid_to_trn[i] == NULL && + my_atomic_casptr((void **)&short_trid_to_trn[i], &tmp, trn)) + break; + } + my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn); + trn->locks.loid= i; +} + +TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) +{ + TRN *trn; + + /* + see trnman_end_trn to see why we need a mutex here + + and as we have a mutex, we can as well do everything + under it - allocating a TRN, incrementing trnman_active_transactions, + setting trn->min_read_from. + + Note that all the above is fast. generating short_trid may be slow, + as it involves scanning a big array - so it's still done + outside of the mutex. + */ + + pthread_mutex_lock(&LOCK_trn_list); + trnman_active_transactions++; + + trn= pool; + my_atomic_rwlock_wrlock(&LOCK_pool); + while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn, + (void *)trn->next)) + /* no-op */; + my_atomic_rwlock_wrunlock(&LOCK_pool); + + if (!trn) + { + trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME)); + if (!trn) + { + pthread_mutex_unlock(&LOCK_trn_list); + return 0; + } + trnman_allocated_transactions++; + } + + trn->min_read_from= active_list_min.next->trid; + + trn->trid= new_trid(); + trn->locks.loid= 0; + + trn->next= &active_list_max; + trn->prev= active_list_max.prev; + active_list_max.prev= trn->prev->next= trn; + pthread_mutex_unlock(&LOCK_trn_list); + + trn->pins= lf_hash_get_pins(&trid_to_trn); + + if (!trn->min_read_from) + trn->min_read_from= trn->trid; + + trn->locks.mutex= mutex; + trn->locks.cond= cond; + trn->commit_trid= 0; + trn->locks.waiting_for= 0; + trn->locks.all_locks= 0; + trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc); + + set_short_trid(trn); /* this must be the last! */ + + return trn; +} + +/* + remove a trn from the active list, + move to committed list, + set commit_trid + + TODO + integrate with log manager. That means: + a common "commit" mutex - forcing the log and setting commit_trid + must be done atomically (QQ how the heck it could be done with + group commit ???) XXX - why did I think it must be done atomically ? + + trid_to_trn, active_list_*, and committed_list_* can be + updated asyncronously. +*/ +void trnman_end_trn(TRN *trn, my_bool commit) +{ + int res; + TRN *free_me= 0; + LF_PINS *pins= trn->pins; + + pthread_mutex_lock(&LOCK_trn_list); + trn->next->prev= trn->prev; + trn->prev->next= trn->next; + + if (trn->prev == &active_list_min) + { + TRN *t; + for (t= committed_list_min.next; + t->commit_trid < active_list_min.next->min_read_from; + t= t->next) /* no-op */; + + if (t != committed_list_min.next) + { + free_me= committed_list_min.next; + committed_list_min.next= t; + t->prev->next= 0; + t->prev= &committed_list_min; + } + } + + if (commit && active_list_min.next != &active_list_max) + { + trn->commit_trid= global_trid_generator; + + trn->next= &committed_list_max; + trn->prev= committed_list_max.prev; + committed_list_max.prev= trn->prev->next= trn; + + res= lf_hash_insert(&trid_to_trn, pins, &trn); + DBUG_ASSERT(res == 0); + } + else + { + trn->next= free_me; + free_me= trn; + } + trnman_active_transactions--; + pthread_mutex_unlock(&LOCK_trn_list); + + lockman_release_locks(&maria_lockman, &trn->locks); + trn->locks.mutex= 0; + trn->locks.cond= 0; + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + my_atomic_storeptr((void **)&short_trid_to_trn[trn->locks.loid], 0); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + + + while (free_me) // XXX send them to the purge thread + { + int res; + TRN *t= free_me; + free_me= free_me->next; + + res= lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID)); + + trnman_free_trn(t); + } + + lf_hash_put_pins(pins); + lf_pinbox_put_pins(trn->locks.pins); +} + +/* + free a trn (add to the pool, that is) + note - we can never really free() a TRN if there's at least one + other running transaction - see, e.g., how lock waits are implemented + in lockman.c +*/ +void trnman_free_trn(TRN *trn) +{ + TRN *tmp= pool; + + my_atomic_rwlock_wrlock(&LOCK_pool); + do + { + /* + without volatile cast gcc-3.4.4 moved the assignment + down after the loop at -O2 + */ + *(TRN * volatile *)&(trn->next)= tmp; + } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trn)); + my_atomic_rwlock_wrunlock(&LOCK_pool); +} + +/* + NOTE + here we access the hash in a lock-free manner. + It's safe, a 'found' TRN can never be freed/reused before we access it. + In fact, it cannot be freed before 'trn' ends, because a 'found' TRN + can only be removed from the hash when: + found->commit_trid < ALL (trn->min_read_from) + that is, at least + found->commit_trid < trn->min_read_from + but + found->trid >= trn->min_read_from + and + found->commit_trid > found->trid +*/ +my_bool trnman_can_read_from(TRN *trn, TrID trid) +{ + TRN **found; + my_bool can; + LF_REQUIRE_PINS(3); + + if (trid < trn->min_read_from) + return TRUE; + if (trid > trn->trid) + return FALSE; + + found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); + if (!found) + return FALSE; /* not in the hash of committed transactions = cannot read*/ + + can= (*found)->commit_trid < trn->trid; + lf_unpin(trn->pins, 2); + return can; +} + diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h new file mode 100644 index 00000000000..9470678f3b2 --- /dev/null +++ b/storage/maria/trnman.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _trnman_h +#define _trnman_h + +#include "lockman.h" + +typedef uint64 TrID; /* our TrID is 6 bytes */ +typedef struct st_transaction TRN; + +struct st_transaction +{ + LOCK_OWNER locks; + LF_PINS *pins; + TrID trid, min_read_from, commit_trid; + TRN *next, *prev; + /* Note! if locks.loid is 0, trn is NOT initialized */ +}; + +#define SHORT_TRID_MAX 65535 + +extern uint trnman_active_transactions, trnman_allocated_transactions; + +int trnman_init(void); +int trnman_destroy(void); +TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond); +void trnman_end_trn(TRN *trn, my_bool commit); +#define trnman_commit_trn(T) trnman_end_trn(T, TRUE) +#define trnman_abort_trn(T) trnman_end_trn(T, FALSE) +void trnman_free_trn(TRN *trn); +my_bool trnman_can_read_from(TRN *trn, TrID trid); + +#endif + diff --git a/storage/maria/trxman.c b/storage/maria/trxman.c deleted file mode 100644 index a3e746af9ca..00000000000 --- a/storage/maria/trxman.c +++ /dev/null @@ -1,258 +0,0 @@ - -#include -#include -#include -#include "trxman.h" - -TRX active_list_min, active_list_max, - committed_list_min, committed_list_max, *pool; - -pthread_mutex_t LOCK_trx_list; -uint trxman_active_transactions, trxman_allocated_transactions; -TrID global_trid_generator; - -TRX **short_id_to_trx; -my_atomic_rwlock_t LOCK_short_id_to_trx; - -LF_HASH trid_to_trx; - -static byte *trx_get_hash_key(const byte *trx,uint* len, my_bool unused) -{ - *len= sizeof(TrID); - return (byte *) & ((*((TRX **)trx))->trid); -} - -int trxman_init() -{ - pthread_mutex_init(&LOCK_trx_list, MY_MUTEX_INIT_FAST); - active_list_max.trid= active_list_min.trid= 0; - active_list_max.min_read_from=~0; - active_list_max.next= active_list_min.prev= 0; - active_list_max.prev= &active_list_min; - active_list_min.next= &active_list_max; - trxman_active_transactions= 0; - trxman_allocated_transactions= 0; - - committed_list_max.commit_trid= ~0; - committed_list_max.next= committed_list_min.prev= 0; - committed_list_max.prev= &committed_list_min; - committed_list_min.next= &committed_list_max; - - pool=0; - global_trid_generator=0; /* set later by recovery code */ - lf_hash_init(&trid_to_trx, sizeof(TRX*), LF_HASH_UNIQUE, - 0, 0, trx_get_hash_key, 0); - my_atomic_rwlock_init(&LOCK_short_id_to_trx); - short_id_to_trx=(TRX **)my_malloc(SHORT_ID_MAX*sizeof(TRX*), - MYF(MY_WME|MY_ZEROFILL)); - if (!short_id_to_trx) - return 1; - short_id_to_trx--; /* min short_id is 1 */ - - return 0; -} - -int trxman_destroy() -{ - DBUG_ASSERT(trid_to_trx.count == 0); - DBUG_ASSERT(trxman_active_transactions == 0); - DBUG_ASSERT(active_list_max.prev == &active_list_min); - DBUG_ASSERT(active_list_min.next == &active_list_max); - DBUG_ASSERT(committed_list_max.prev == &committed_list_min); - DBUG_ASSERT(committed_list_min.next == &committed_list_max); - while (pool) - { - TRX *tmp=pool->next; - my_free((void *)pool, MYF(0)); - pool=tmp; - } - lf_hash_destroy(&trid_to_trx); - pthread_mutex_destroy(&LOCK_trx_list); - my_atomic_rwlock_destroy(&LOCK_short_id_to_trx); - my_free((void *)(short_id_to_trx+1), MYF(0)); -} - -static TrID new_trid() -{ - DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); - safe_mutex_assert_owner(&LOCK_trx_list); - return ++global_trid_generator; -} - -static void set_short_id(TRX *trx) -{ - int i= (global_trid_generator + (intptr)trx) * 312089 % SHORT_ID_MAX; - my_atomic_rwlock_wrlock(&LOCK_short_id_to_trx); - for ( ; ; i= i % SHORT_ID_MAX + 1) /* the range is [1..SHORT_ID_MAX] */ - { - void *tmp=NULL; - if (short_id_to_trx[i] == NULL && - my_atomic_casptr((void **)&short_id_to_trx[i], &tmp, trx)) - break; - } - my_atomic_rwlock_wrunlock(&LOCK_short_id_to_trx); - trx->short_id= i; -} - -TRX *trxman_new_trx() -{ - TRX *trx; - - my_atomic_add32(&trxman_active_transactions, 1); - - /* - see trxman_end_trx to see why we need a mutex here - - and as we have a mutex, we can as well do everything - under it - allocating a TRX, incrementing trxman_active_transactions, - setting trx->min_read_from. - - Note that all the above is fast. generating short_id may be slow, - as it involves scanning a big array - so it's still done - outside of the mutex. - */ - - pthread_mutex_lock(&LOCK_trx_list); - trx=pool; - while (trx && !my_atomic_casptr((void **)&pool, (void **)&trx, trx->next)) - /* no-op */; - - if (!trx) - { - trx=(TRX *)my_malloc(sizeof(TRX), MYF(MY_WME)); - trxman_allocated_transactions++; - } - if (!trx) - return 0; - - trx->min_read_from= active_list_min.next->trid; - - trx->trid= new_trid(); - trx->short_id= 0; - - trx->next= &active_list_max; - trx->prev= active_list_max.prev; - active_list_max.prev= trx->prev->next= trx; - pthread_mutex_unlock(&LOCK_trx_list); - - trx->pins=lf_hash_get_pins(&trid_to_trx); - - if (!trx->min_read_from) - trx->min_read_from= trx->trid; - - trx->commit_trid=0; - - set_short_id(trx); /* this must be the last! */ - - - return trx; -} - -/* - remove a trx from the active list, - move to committed list, - set commit_trid - - TODO - integrate with lock manager, log manager. That means: - a common "commit" mutex - forcing the log and setting commit_trid - must be done atomically (QQ how the heck it could be done with - group commit ???) - - trid_to_trx, active_list_*, and committed_list_* can be - updated asyncronously. -*/ -void trxman_end_trx(TRX *trx, my_bool commit) -{ - int res; - TRX *free_me= 0; - LF_PINS *pins= trx->pins; - - pthread_mutex_lock(&LOCK_trx_list); - trx->next->prev= trx->prev; - trx->prev->next= trx->next; - - if (trx->prev == &active_list_min) - { - TRX *t; - for (t= committed_list_min.next; - t->commit_trid < active_list_min.next->min_read_from; - t= t->next) /* no-op */; - - if (t != committed_list_min.next) - { - free_me= committed_list_min.next; - committed_list_min.next= t; - t->prev->next=0; - t->prev= &committed_list_min; - } - } - - my_atomic_rwlock_wrlock(&LOCK_short_id_to_trx); - my_atomic_storeptr((void **)&short_id_to_trx[trx->short_id], 0); - my_atomic_rwlock_wrunlock(&LOCK_short_id_to_trx); - - if (commit && active_list_min.next != &active_list_max) - { - trx->commit_trid= global_trid_generator; - - trx->next= &committed_list_max; - trx->prev= committed_list_max.prev; - committed_list_max.prev= trx->prev->next= trx; - - res= lf_hash_insert(&trid_to_trx, pins, &trx); - DBUG_ASSERT(res == 0); - } - else - { - trx->next=free_me; - free_me=trx; - } - pthread_mutex_unlock(&LOCK_trx_list); - - my_atomic_add32(&trxman_active_transactions, -1); - - while (free_me) - { - int res; - TRX *t= free_me; - free_me= free_me->next; - - res= lf_hash_delete(&trid_to_trx, pins, &t->trid, sizeof(TrID)); - - trxman_free_trx(t); - } - - lf_hash_put_pins(pins); -} - -/* free a trx (add to the pool, that is */ -void trxman_free_trx(TRX *trx) -{ - TRX *tmp=pool; - - do - { - trx->next=tmp; - } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trx)); -} - -my_bool trx_can_read_from(TRX *trx, TrID trid) -{ - TRX *found; - my_bool can; - - if (trid < trx->min_read_from) - return TRUE; - if (trid > trx->trid) - return FALSE; - - found= lf_hash_search(&trid_to_trx, trx->pins, &trid, sizeof(trid)); - if (!found) - return FALSE; /* not in the hash = cannot read */ - - can= found->commit_trid < trx->trid; - lf_unpin(trx->pins, 2); - return can; -} - diff --git a/storage/maria/trxman.h b/storage/maria/trxman.h deleted file mode 100644 index 5ac989d03a4..00000000000 --- a/storage/maria/trxman.h +++ /dev/null @@ -1,28 +0,0 @@ - -typedef uint64 TrID; /* our TrID is 6 bytes */ - -typedef struct st_transaction -{ - TrID trid, min_read_from, commit_trid; - struct st_transaction *next, *prev; - /* Note! if short_id is 0, trx is NOT initialized */ - uint16 short_id; - LF_PINS *pins; -} TRX; - -#define SHORT_ID_MAX 65535 - -extern uint trxman_active_transactions, trxman_allocated_transactions; - -extern TRX **short_id_to_trx; -extern my_atomic_rwlock_t LOCK_short_id_to_trx; - -int trxman_init(); -int trxman_end(); -TRX *trxman_new_trx(); -void trxman_end_trx(TRX *trx, my_bool commit); -#define trxman_commit_trx(T) trxman_end_trx(T, TRUE) -#define trxman_abort_trx(T) trxman_end_trx(T, FALSE) -void trxman_free_trx(TRX *trx); -my_bool trx_can_read_from(TRX *trx, TrID trid); - diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index 8a5ca3d669f..e29bc7f86cb 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -25,5 +25,5 @@ LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -noinst_PROGRAMS = ma_control_file-t +noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t CLEANFILES = maria_control diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c new file mode 100644 index 00000000000..8b62ccfe094 --- /dev/null +++ b/storage/maria/unittest/lockman-t.c @@ -0,0 +1,246 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +//#define EXTRA_VERBOSE + +#include + +#include +#include +#include +#include +#include "../lockman.h" + +#define Nlos 10 +LOCK_OWNER loarray[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKMAN lockman; + +#ifndef EXTRA_VERBOSE +#define print_lockhash(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +LOCK_OWNER *loid2lo(uint16 loid) +{ + return loarray+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman) +#define test_lock(O, R, L, S, RES) \ + ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \ + "lo" #O "> " S " lock resource " #R " with " #L "-lock"); \ + print_lockhash(&lockman) +#define lock_ok_a(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK) +#define lock_ok_i(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O,R,L) test_lock(O,R,L,"cannot ",DIDNT_GET_THE_LOCK); \ + unlock_all(O) + +void test_lockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2,1,X); /* this removes all locks of lo2 */ + lock_ok_a(1,2,S); + lock_ok_a(1,2,IS); + lock_ok_a(1,2,LS); + lock_ok_i(1,3,IX); + lock_ok_a(2,3,LS); + lock_ok_i(1,3,IX); + lock_ok_l(2,3,IS); + lockman_release_locks(&lockman, loid2lo(1)); + lockman_release_locks(&lockman, loid2lo(2)); + +} + +pthread_attr_t rt_attr; +pthread_mutex_t rt_mutex; +pthread_cond_t rt_cond; +int rt_num_threads; +int litmus; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t t; + ulonglong now= my_getsystime(); + + litmus= 0; + + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + for (rt_num_threads= n ; n ; n--) + pthread_create(&t, &rt_attr, handler, &m); + pthread_mutex_lock(&rt_mutex); + while (rt_num_threads) + pthread_cond_wait(&rt_cond, &rt_mutex); + pthread_mutex_unlock(&rt_mutex); + now= my_getsystime()-now; + ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); +} + +int thread_number= 0, timeouts=0; +#define Nrows 1000 +#define Ntables 10 +#define TABLE_LOCK_RATIO 10 +enum lock_type lock_array[6]={S,X,LS,LX,IS,IX}; +char *lock2str[6]={"S","X","LS","LX","IS","IX"}; +char *res2str[6]={ + "DIDN'T GET THE LOCK", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + LOCK_OWNER *lo; + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo= loid2lo(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if ((x/Nrows/4) % TABLE_LOCK_RATIO == 0) + { /* table lock */ + res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]); + DIAG(("loid=%2d, table %d lock %s, res=%s", loid, table, lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + DIAG(("loid=%2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]); + DIAG(("loid=%2d, row %d lock %s, res=%s", loid, row, lock2str[locklevel+4], res2str[res])); + switch (res) + { + case DIDNT_GET_THE_LOCK: + lockman_release_locks(&lockman, lo); + DIAG(("loid=%2d, release all locks", loid)); + timeout++; + continue; + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); + DIAG(("loid=%2d, ROW %d lock %s, res=%s", loid, row, lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + DIAG(("loid=%2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + DBUG_ASSERT(0); + } + } + } + + lockman_release_locks(&lockman, lo); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + { + pthread_cond_signal(&rt_cond); + diag("number of timeouts: %d", timeouts); + } + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + + plan(14); + + if (my_atomic_initialize()) + return exit_status(); + + pthread_attr_init(&rt_attr); + pthread_attr_setdetachstate(&rt_attr,PTHREAD_CREATE_DETACHED); + pthread_mutex_init(&rt_mutex, 0); + pthread_cond_init(&rt_cond, 0); + + lockman_init(&lockman, &loid2lo, 50); + + for (i= 0; i < Nlos; i++) + { + loarray[i].pins= lf_alloc_get_pins(&lockman.alloc); + loarray[i].all_locks= 0; + loarray[i].waiting_for= 0; + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + loarray[i].mutex= &mutexes[i]; + loarray[i].cond= &conds[i]; + loarray[i].loid= i+1; + } + + test_lockman_simple(); + +#define CYCLES 100 +#define THREADS Nlos /* don't change this line */ + + run_test("lockman", test_lockman, THREADS,CYCLES); + + for (i= 0; i < Nlos; i++) + { + lockman_release_locks(&lockman, &loarray[i]); + pthread_mutex_destroy(loarray[i].mutex); + pthread_cond_destroy(loarray[i].cond); + lf_pinbox_put_pins(loarray[i].pins); + } + + lockman_destroy(&lockman); + + pthread_mutex_destroy(&rt_mutex); + pthread_cond_destroy(&rt_cond); + pthread_attr_destroy(&rt_attr); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c new file mode 100644 index 00000000000..6d4b48c6d3d --- /dev/null +++ b/storage/maria/unittest/trnman-t.c @@ -0,0 +1,177 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + +#include +#include +#include +#include +#include "../trnman.h" + +pthread_attr_t rt_attr; +pthread_mutex_t rt_mutex; +pthread_cond_t rt_cond; +int rt_num_threads; + +int litmus; + +/* + create and end (commit or rollback) transactions randomly +*/ +#define MAX_ITER 100 +pthread_handler_t test_trnman(void *arg) +{ + int m= (*(int *)arg); + uint x, y, i, j, n; + TRN *trn[MAX_ITER]; + pthread_mutex_t mutexes[MAX_ITER]; + pthread_cond_t conds[MAX_ITER]; + + for (i=0; i < MAX_ITER; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init(&conds[i], 0); + } + + for (x= ((int)(intptr)(&m)); m > 0; ) + { + y= x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + m-= n= x % MAX_ITER; + for (i= 0; i < n; i++) + trn[i]= trnman_new_trn(&mutexes[i], &conds[i]); + for (i= 0; i < n; i++) + { + y= (y*19 + 7) % 31; + trnman_end_trn(trn[i], y & 1); + } + } + + for (i=0; i < MAX_ITER; i++) + { + pthread_mutex_destroy(&mutexes[i]); + pthread_cond_destroy(&conds[i]); + } + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + if (!rt_num_threads) + pthread_cond_signal(&rt_cond); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} +#undef MAX_ITER + +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t t; + ulonglong now= my_getsystime(); + + litmus= 0; + + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + for (rt_num_threads= n ; n ; n--) + pthread_create(&t, &rt_attr, handler, &m); + pthread_mutex_lock(&rt_mutex); + while (rt_num_threads) + pthread_cond_wait(&rt_cond, &rt_mutex); + pthread_mutex_unlock(&rt_mutex); + now= my_getsystime()-now; + ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); +} + +#define ok_read_from(T1, T2, RES) \ + i=trnman_can_read_from(trn[T1], trid[T2]); \ + ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot") +#define start_transaction(T) \ + trn[T]= trnman_new_trn(&mutexes[T], &conds[T]); \ + trid[T]= trn[T]->trid +#define commit(T) trnman_commit_trn(trn[T]) +#define abort(T) trnman_abort_trn(trn[T]) + +#define Ntrns 4 +void test_trnman_read_from() +{ + TRN *trn[Ntrns]; + TrID trid[Ntrns]; + pthread_mutex_t mutexes[Ntrns]; + pthread_cond_t conds[Ntrns]; + int i; + + for (i=0; i < Ntrns; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init(&conds[i], 0); + } + + start_transaction(0); /* start trn1 */ + start_transaction(1); /* start trn2 */ + ok_read_from(1,0,0); + commit(0); /* commit trn1 */ + start_transaction(2); /* start trn4 */ + abort(2); /* abort trn4 */ + start_transaction(3); /* start trn5 */ + ok_read_from(3,0,1); + ok_read_from(3,1,0); + ok_read_from(3,2,0); + commit(1); /* commit trn2 */ + ok_read_from(3,1,0); + commit(3); /* commit trn5 */ + + for (i=0; i < Ntrns; i++) + { + pthread_mutex_destroy(&mutexes[i]); + pthread_cond_destroy(&conds[i]); + } +} + +int main() +{ + my_init(); + + plan(6); + + if (my_atomic_initialize()) + return exit_status(); + + pthread_attr_init(&rt_attr); + pthread_attr_setdetachstate(&rt_attr,PTHREAD_CREATE_DETACHED); + pthread_mutex_init(&rt_mutex, 0); + pthread_cond_init(&rt_cond, 0); + +#define CYCLES 10000 +#define THREADS 10 + + trnman_init(); + + test_trnman_read_from(); + run_test("trnman", test_trnman, THREADS,CYCLES); + + diag("mallocs: %d", trnman_allocated_transactions); + { + ulonglong now= my_getsystime(); + trnman_destroy(); + now= my_getsystime()-now; + diag("trnman_destroy: %g", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + pthread_cond_destroy(&rt_cond); + pthread_attr_destroy(&rt_attr); + my_end(0); + return exit_status(); +} + diff --git a/unittest/maria/Makefile.am b/unittest/maria/Makefile.am deleted file mode 100644 index 667d1e09a07..00000000000 --- a/unittest/maria/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ - -AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include -AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap - -LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ - $(top_builddir)/storage/maria/libmaria.a \ - $(top_builddir)/mysys/libmysys.a \ - $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a - -noinst_PROGRAMS = trxman-t - diff --git a/unittest/maria/trxman-t.c b/unittest/maria/trxman-t.c deleted file mode 100644 index 7536e5534bf..00000000000 --- a/unittest/maria/trxman-t.c +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (C) 2006 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include - -#include -#include -#include -#include -#include "../../storage/maria/trxman.h" - -pthread_attr_t rt_attr; -pthread_mutex_t rt_mutex; -pthread_cond_t rt_cond; -int rt_num_threads; - -int litmus; - -/* template for a test: the goal is to have litmus==0 if the test passed - -#define ITER nnn -pthread_handler_t test_XXXXXXXX(void *arg) -{ - int m=(*(int *)arg)/ITER, x; - - for (x=((int)(intptr)(&m)); m ; m--) - { - // do something with litmus - } - // do something more with litmus - - pthread_mutex_lock(&rt_mutex); - rt_num_threads--; - if (!rt_num_threads) - { - diag("whatever diagnostics we want", blabla, foobar); - pthread_cond_signal(&rt_cond); - } - pthread_mutex_unlock(&rt_mutex); - return 0; -} -#undef ITER - -*/ - -/* - create and end (commit or rollback) transactions randomly -*/ -#define MAX_ITER 100 -pthread_handler_t test_trxman(void *arg) -{ - int m=(*(int *)arg); - uint x, y, i, j, n; - TRX *trx[MAX_ITER]; - - for (x=((int)(intptr)(&m)); m > 0; ) - { - y= x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ - m-= n= x % MAX_ITER; - for (i=0; i < n; i++) - trx[i]=trxman_new_trx(); - for (i=0; i < n; i++) - { - y=(y*19 + 7) % 31; - trxman_end_trx(trx[i], y & 1); - } - } - - pthread_mutex_lock(&rt_mutex); - rt_num_threads--; - if (!rt_num_threads) - pthread_cond_signal(&rt_cond); - pthread_mutex_unlock(&rt_mutex); - return 0; -} -#undef MAX_ITER - -void run_test(const char *test, pthread_handler handler, int n, int m) -{ - pthread_t t; - ulonglong now=my_getsystime(); - - litmus= 0; - - diag("Testing %s with %d threads, %d iterations... ", test, n, m); - for (rt_num_threads=n ; n ; n--) - pthread_create(&t, &rt_attr, handler, &m); - pthread_mutex_lock(&rt_mutex); - while (rt_num_threads) - pthread_cond_wait(&rt_cond, &rt_mutex); - pthread_mutex_unlock(&rt_mutex); - now=my_getsystime()-now; - ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); -} - -int main() -{ - plan(1); - - if (my_atomic_initialize()) - return exit_status(); - - my_init(); - - pthread_attr_init(&rt_attr); - pthread_attr_setdetachstate(&rt_attr,PTHREAD_CREATE_DETACHED); - pthread_mutex_init(&rt_mutex, 0); - pthread_cond_init(&rt_cond, 0); - -#define CYCLES 10000 -#define THREADS 10 - - trxman_init(); - run_test("trxman", test_trxman, THREADS,CYCLES); - trxman_destroy(); - diag("mallocs: %d\n", trxman_allocated_transactions); - - pthread_mutex_destroy(&rt_mutex); - pthread_cond_destroy(&rt_cond); - pthread_attr_destroy(&rt_attr); - my_end(0); - return exit_status(); -} - From 0cb46440105a7b31b2959f4c8fa22be5e333e87a Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 15 Oct 2006 22:26:42 +0300 Subject: [PATCH 38/95] This is not supposed to be pushed, it is just to check of source of mac os problems storage/maria/_MakeLists.txt: Rename: storage/maria/CMakeLists.txt -> storage/maria/_MakeLists.txt --- storage/maria/{CMakeLists.txt => _MakeLists.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename storage/maria/{CMakeLists.txt => _MakeLists.txt} (100%) diff --git a/storage/maria/CMakeLists.txt b/storage/maria/_MakeLists.txt similarity index 100% rename from storage/maria/CMakeLists.txt rename to storage/maria/_MakeLists.txt From 12a55aeabc353fdc1c3829ddd8baacb142160c80 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 18 Oct 2006 17:24:07 +0200 Subject: [PATCH 39/95] lock manager passed unit tests storage/maria/trnman.c: comments include/my_dbug.h: make DBUG_ASSERT always a statement storage/maria/lockman.h: comments include/lf.h: lf_pinbox - don't use a fixed-size purgatory. mysys/lf_alloc-pin.c: lf_pinbox - don't use a fixed-size purgatory. mysys/lf_hash.c: lf_pinbox - don't use a fixed-size purgatory. storage/maria/lockman.c: removed IGNORE_ME/UPGDARED matching - it was wrong in the first place. updated for "lf_pinbox - don't use a fixed-size purgatory" storage/maria/unittest/lockman-t.c: IGNORE_ME/UPGRADED pair counting bugtest. more tests unittest/mysys/my_atomic-t.c: lf_pinbox - don't use a fixed-size purgatory. --- include/lf.h | 15 ++- include/my_dbug.h | 2 +- mysys/lf_alloc-pin.c | 172 ++++++++++++------------- mysys/lf_hash.c | 4 +- storage/maria/lockman.c | 199 +++++++++++++++++++++-------- storage/maria/lockman.h | 9 +- storage/maria/trnman.c | 22 +++- storage/maria/unittest/lockman-t.c | 85 +++++++++--- unittest/mysys/my_atomic-t.c | 4 +- 9 files changed, 330 insertions(+), 182 deletions(-) diff --git a/include/lf.h b/include/lf.h index 4c6765b2d40..45c5f109e1c 100644 --- a/include/lf.h +++ b/include/lf.h @@ -96,20 +96,21 @@ typedef void lf_pinbox_free_func(void *, void *); typedef struct { LF_DYNARRAY pinstack; lf_pinbox_free_func *free_func; - void * free_func_arg; + void *free_func_arg; + uint free_ptr_offset; uint32 volatile pinstack_top_ver; /* this is a versioned pointer */ uint32 volatile pins_in_stack; /* number of elements in array */ } LF_PINBOX; -/* we want sizeof(LF_PINS) to be close to 128 to avoid false sharing */ +/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ typedef struct { void * volatile pin[LF_PINBOX_PINS]; - void * purgatory[LF_PURGATORY_SIZE]; LF_PINBOX *pinbox; + void *purgatory; uint32 purgatory_count; uint32 volatile link; char pad[128-sizeof(uint32)*2 - -sizeof(void *)*(LF_PINBOX_PINS+LF_PURGATORY_SIZE+1)]; + -sizeof(void *)*(LF_PINBOX_PINS+2)]; } LF_PINS; #define lf_rwlock_by_pins(PINS) \ @@ -147,8 +148,8 @@ typedef struct { #define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0) #define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN]==0) -void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, - void * free_func_arg); +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func, void * free_func_arg); void lf_pinbox_destroy(LF_PINBOX *pinbox); lock_wrap(lf_pinbox_get_pins, LF_PINS *, @@ -181,7 +182,7 @@ typedef struct st_lf_allocator { uint32 volatile mallocs; } LF_ALLOCATOR; -void lf_alloc_init(LF_ALLOCATOR *allocator, uint size); +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset); void lf_alloc_destroy(LF_ALLOCATOR *allocator); uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); #define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) diff --git a/include/my_dbug.h b/include/my_dbug.h index a397861c1af..e3fdf9d6461 100644 --- a/include/my_dbug.h +++ b/include/my_dbug.h @@ -100,7 +100,7 @@ extern FILE *_db_fp_(void); #define DBUG_LONGJMP(a1) longjmp(a1) #define DBUG_DUMP(keyword,a1,a2) #define DBUG_END() -#define DBUG_ASSERT(A) +#define DBUG_ASSERT(A) do { } while(0) #define DBUG_LOCK_FILE #define DBUG_FILE (stderr) #define DBUG_UNLOCK_FILE diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index cf1612b73d1..842c9690463 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -15,21 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - concurrent allocator based on pinning addresses - - strictly speaking it's not lock-free, as it can be blocked - if a thread's purgatory is full and all addresses from there - are pinned. - - But until the above happens, it's wait-free. - - It can be made strictly wait-free by increasing purgatory size. - If it's larger than pins_in_stack*LF_PINBOX_PINS, then apocalyptical - condition above will never happen. But than the memory requirements - will be O(pins_in_stack^2). - - Note, that for large purgatory sizes it makes sense to remove - purgatory array, and link objects in a list using embedded pointer. + wait-free concurrent allocator based on pinning addresses TODO test with more than 256 threads TODO test w/o alloca @@ -43,15 +29,17 @@ static void _lf_pinbox_real_free(LF_PINS *pins); -void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, - void *free_func_arg) +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func,void *free_func_arg) { DBUG_ASSERT(sizeof(LF_PINS) == 128); + DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS)); - pinbox->pinstack_top_ver=0; - pinbox->pins_in_stack=0; - pinbox->free_func=free_func; - pinbox->free_func_arg=free_func_arg; + pinbox->pinstack_top_ver= 0; + pinbox->pins_in_stack= 0; + pinbox->free_ptr_offset= free_ptr_offset; + pinbox->free_func= free_func; + pinbox->free_func_arg= free_func_arg; } void lf_pinbox_destroy(LF_PINBOX *pinbox) @@ -64,58 +52,64 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) uint32 pins, next, top_ver; LF_PINS *el; - top_ver=pinbox->pinstack_top_ver; + top_ver= pinbox->pinstack_top_ver; do { - if (!(pins=top_ver % LF_PINBOX_MAX_PINS)) + if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) { - pins=my_atomic_add32(&pinbox->pins_in_stack, 1)+1; - el=(LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); + pins= my_atomic_add32(&pinbox->pins_in_stack, 1)+1; + el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); break; } - el=(LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); - next=el->link; + el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); + next= el->link; } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, top_ver-pins+next+LF_PINBOX_MAX_PINS)); - el->link=pins; - el->purgatory_count=0; - el->pinbox=pinbox; + el->link= pins; + el->purgatory_count= 0; + el->pinbox= pinbox; return el; } void _lf_pinbox_put_pins(LF_PINS *pins) { - LF_PINBOX *pinbox=pins->pinbox; + LF_PINBOX *pinbox= pins->pinbox; uint32 top_ver, nr; - nr=pins->link; + nr= pins->link; #ifdef MY_LF_EXTRA_DEBUG { int i; - for (i=0; i < LF_PINBOX_PINS; i++) + for (i= 0; i < LF_PINBOX_PINS; i++) assert(pins->pin[i] == 0); } #endif + /* + Note - this will deadlock if other threads will wait for + the caller to do something after _lf_pinbox_put_pins(), + and they would have pinned addresses that the caller wants to free. + Thus: only free pins when all work is done and nobody can wait for you!!! + */ while (pins->purgatory_count) { _lf_pinbox_real_free(pins); - if (pins->purgatory_count && my_getncpus() == 1) + if (pins->purgatory_count) { my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); pthread_yield(); my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); } } - top_ver=pinbox->pinstack_top_ver; + top_ver= pinbox->pinstack_top_ver; if (nr == pinbox->pins_in_stack) { - int32 tmp=nr; + int32 tmp= nr; if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1)) goto ret; } do { - pins->link=top_ver % LF_PINBOX_MAX_PINS; + pins->link= top_ver % LF_PINBOX_MAX_PINS; } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); ret: @@ -127,19 +121,20 @@ static int ptr_cmp(void **a, void **b) return *a < *b ? -1 : *a == *b ? 0 : 1; } +#define add_to_purgatory(PINS, ADDR) \ + do \ + { \ + *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \ + (PINS)->purgatory; \ + (PINS)->purgatory= (ADDR); \ + (PINS)->purgatory_count++; \ + } while (0) + void _lf_pinbox_free(LF_PINS *pins, void *addr) { - while (pins->purgatory_count == LF_PURGATORY_SIZE) - { + if (pins->purgatory_count % LF_PURGATORY_SIZE) _lf_pinbox_real_free(pins); - if (pins->purgatory_count == LF_PURGATORY_SIZE && my_getncpus() == 1) - { - my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); - pthread_yield(); - my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); - } - } - pins->purgatory[pins->purgatory_count++]=addr; + add_to_purgatory(pins, addr); } struct st_harvester { @@ -178,11 +173,11 @@ static int match_pins(LF_PINS *el, void *addr) static void _lf_pinbox_real_free(LF_PINS *pins) { int npins; - void **addr=0; - void **start, **cur, **end=pins->purgatory+pins->purgatory_count; - LF_PINBOX *pinbox=pins->pinbox; + void *list; + void **addr; + LF_PINBOX *pinbox= pins->pinbox; - npins=pinbox->pins_in_stack+1; + npins= pinbox->pins_in_stack+1; #ifdef HAVE_ALLOCA /* create a sorted list of pinned addresses, to speed up searches */ @@ -190,64 +185,64 @@ static void _lf_pinbox_real_free(LF_PINS *pins) { struct st_harvester hv; addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); - hv.granary=addr; - hv.npins=npins; + hv.granary= addr; + hv.npins= npins; _lf_dynarray_iterate(&pinbox->pinstack, (lf_dynarray_func)harvest_pins, &hv); - npins=hv.granary-addr; + npins= hv.granary-addr; if (npins) qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); } + else #endif + addr= 0; - start= cur= pins->purgatory; - end= start+pins->purgatory_count; - for (; cur < end; cur++) + list= pins->purgatory; + pins->purgatory= 0; + pins->purgatory_count= 0; + while (list) { + void *cur= list; + list= *(void **)((char *)cur+pinbox->free_ptr_offset); if (npins) { if (addr) { void **a,**b,**c; - for (a=addr, b=addr+npins-1, c=a+(b-a)/2; b-a>1; c=a+(b-a)/2) - if (*cur == *c) - a=b=c; - else if (*cur > *c) - a=c; + for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) + if (cur == *c) + a= b= c; + else if (cur > *c) + a= c; else - b=c; - if (*cur == *a || *cur == *b) + b= c; + if (cur == *a || cur == *b) goto found; } else { if (_lf_dynarray_iterate(&pinbox->pinstack, - (lf_dynarray_func)match_pins, *cur)) + (lf_dynarray_func)match_pins, cur)) goto found; } } /* not pinned - freeing */ - pinbox->free_func(*cur, pinbox->free_func_arg); + pinbox->free_func(cur, pinbox->free_func_arg); continue; found: /* pinned - keeping */ - *start++=*cur; + add_to_purgatory(pins, cur); } - pins->purgatory_count=start-pins->purgatory; -#ifdef MY_LF_EXTRA_DEBUG - while (start < pins->purgatory + LF_PURGATORY_SIZE) - *start++=0; -#endif } static void alloc_free(void *node, LF_ALLOCATOR *allocator) { void *tmp; - tmp=allocator->top; + tmp= allocator->top; do { - (*(void **)node)=tmp; + (*(void **)node)= tmp; } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && LF_BACKOFF); } @@ -255,18 +250,18 @@ static void alloc_free(void *node, LF_ALLOCATOR *allocator) LF_REQUIRE_PINS(1); void *_lf_alloc_new(LF_PINS *pins) { - LF_ALLOCATOR *allocator=(LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); void *node; for (;;) { do { - node=allocator->top; + node= allocator->top; _lf_pin(pins, 0, node); - } while (node !=allocator->top && LF_BACKOFF); + } while (node != allocator->top && LF_BACKOFF); if (!node) { - if (!(node=my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) goto ret; #ifdef MY_LF_EXTRA_DEBUG my_atomic_add32(&allocator->mallocs, 1); @@ -282,27 +277,27 @@ ret: return node; } -void lf_alloc_init(LF_ALLOCATOR *allocator, uint size) +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) { - lf_pinbox_init(&allocator->pinbox, + lf_pinbox_init(&allocator->pinbox, free_ptr_offset, (lf_pinbox_free_func *)alloc_free, allocator); - allocator->top=0; - allocator->mallocs=0; - allocator->element_size=size; + allocator->top= 0; + allocator->mallocs= 0; + allocator->element_size= size; DBUG_ASSERT(size >= (int)sizeof(void *)); } void lf_alloc_destroy(LF_ALLOCATOR *allocator) { - void *el=allocator->top; + void *el= allocator->top; while (el) { - void *tmp=*(void **)el; + void *tmp= *(void **)el; my_free(el, MYF(0)); - el=tmp; + el= tmp; } lf_pinbox_destroy(&allocator->pinbox); - allocator->top=0; + allocator->top= 0; } /* @@ -313,7 +308,8 @@ uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) { uint i; void *node; - for (node=allocator->top, i=0; node; node=*(void **)node, i++) /* no op */; + for (node= allocator->top, i= 0; node; node= *(void **)node, i++) + /* no op */; return i; } diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index a0425e89556..45b45f7531e 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -20,6 +20,7 @@ TODO try to get rid of dummy nodes ? for non-unique hash, count only _distinct_ values + (but how to do it in lf_hash_delete ?) */ #include #include @@ -222,7 +223,8 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset) { - lf_alloc_init(&hash->alloc,sizeof(LF_SLIST)+element_size); + lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, + offsetof(LF_SLIST, key)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); hash->size=1; hash->count=0; diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index e8ddbd1a25a..1712f6f2221 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -1,4 +1,4 @@ -// TODO lock escalation, instant duration locks +// TODO instant duration locks // automatically place S instead of LS if possible /* TODO optimization: table locks - they have completely @@ -21,6 +21,94 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + Generic Lock Manager + + Lock manager handles locks on "resources", a resource must be uniquely + identified by a 64-bit number. Lock manager itself does not imply + anything about the nature of a resource - it can be a row, a table, a + database, or just anything. + + Locks belong to "lock owners". A Lock owner is uniquely identified by a + 16-bit number. A function loid2lo must be provided by the application + that takes such a number as an argument and returns a LOCK_OWNER + structure. + + Lock levels are completely defined by three tables. Lock compatibility + matrix specifies which locks can be held at the same time on a resource. + Lock combining matrix specifies what lock level has the same behaviour as + a pair of two locks of given levels. getlock_result matrix simplifies + intention locking and lock escalation for an application, basically it + defines which locks are intention locks and which locks are "loose" + locks. It is only used to provide better diagnostics for the + application, lock manager itself does not differentiate between normal, + intention, and loose locks. + + Internally lock manager is based on a lock-free hash, see lf_hash.c for + details. All locks are stored in a hash, with a resource id as a search + key, so all locks for the same resource will be considered collisions and + will be put in a one (lock-free) linked list. The main lock-handling + logic is in the inner loop that searches for a lock in such a linked + list - lockfind(). + + This works as follows. Locks generally are added to the end of the list + (with one exception, see below). When scanning the list it is always + possible to determine what locks are granted (active) and what locks are + waiting - first lock is obviously active, the second is active if it's + compatible with the first, and so on, a lock is active if it's compatible + with all previous locks and all locks before it are also active. + To calculate the "compatible with all previous locks" all locks are + accumulated in prev_lock variable using lock_combining_matrix. + + Lock upgrades: when a thread that has a lock on a given resource, + requests a new lock on the same resource and the old lock is not enough + to satisfy new lock requirements (which is defined by + lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock is + placed in the list. Depending on other locks it is immediately active or + it will wait for other locks. Here's an exception to "locks are added + to the end" rule - upgraded locks are added after the last active lock + but before all waiting locks. Old lock (the one we upgraded from) is + not removed from the list, indeed we may need to return to it later if + the new lock was in a savepoint that gets rolled back. So old lock is + marked as "ignored" (IGNORE_ME flag). New lock gets an UPGRADED flag. + + Loose locks add an important exception to the above. Loose locks do not + always commute with other locks. In the list IX-LS both locks are active, + while in the LS-IX list only the first lock is active. This creates a + problem in lock upgrades. If the list was IX-LS and the owner of the + first lock wants to place LS lock (which can be immediately granted), the + IX lock is upgraded to LSIX and the list becomes IX-LS-LSIX, which, + according to the lock compatibility matrix means that the last lock is + waiting - of course it all happened because IX and LS were swapped and + they don't commute. To work around this there's ACTIVE flag which is set + in every lock that never waited (was placed active), and this flag + overrides "compatible with all previous locks" rule. + + When a lock is placed to the end of the list it's either compatible with + all locks and all locks are active - new lock becomes active at once, or + it conflicts with some of the locks, in this case in the 'blocker' + variable a conflicting lock is returned and the calling thread waits on a + pthread condition in the LOCK_OWNER structure of the owner of the + conflicting lock. Or a new lock is compatible with all locks, but some + existing locks are not compatible with previous locks (example: request IS, + when the list is S-IX) - that is not all locks are active. In this case a + first waiting lock is returned in the 'blocker' variable, + lockman_getlock() notices that a "blocker" does not conflict with the + requested lock, and "dereferences" it, to find the lock that it's waiting + on. The calling thread than begins to wait on the same lock. + + To better support table-row relations where one needs to lock the table + with an intention lock before locking the row, extended diagnostics is + provided. When an intention lock (presumably on a table) is granted, + lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row, + perhaps the thread already has a normal lock on this table), + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual), + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check + whether it's possible to lock the row, but no need to lock it - perhaps + the thread has a loose lock on this table). This is defined by + getlock_result[] table. +*/ + #include #include #include @@ -36,11 +124,6 @@ ') Though you can take LS lock while somebody has S lock, it makes no sense - it's simpler to take S lock too. - ") Strictly speaking you can take LX lock while somebody has S lock. - But in this case you lock no rows, because all rows are locked by this - somebody. So we prefer to define that LX cannot be taken when S - exists. Same about LX and X. - 1 - compatible 0 - incompatible -1 - "impossible", so that we can assert the impossibility. @@ -107,8 +190,8 @@ static enum lock_type lock_combining_matrix[10][10]= Defines below help to preserve the table structure. I/L/A values are self explanatory x means the combination is possible (assert should not crash) - but cannot happen in row locks, only in table locks (S,X), or - lock escalations (LS,LX) + but it cannot happen in row locks, only in table locks (S,X), + or lock escalations (LS,LX) */ #define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE #define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE @@ -138,8 +221,7 @@ typedef struct lockman_lock { uint64 resource; struct lockman_lock *lonext; intptr volatile link; - uint32 hashnr; -//#warning TODO - remove hashnr from LOCK + uint32 hashnr; // TODO - remove hashnr from LOCK uint16 loid; uchar lock; /* sizeof(uchar) <= sizeof(enum) */ uchar flags; @@ -147,6 +229,7 @@ typedef struct lockman_lock { #define IGNORE_ME 1 #define UPGRADED 2 +#define ACTIVE 4 typedef struct { intptr volatile *prev; @@ -171,7 +254,7 @@ static int lockfind(LOCK * volatile *head, LOCK *node, my_bool cur_active, compatible, upgrading, prev_active; enum lock_type lock, prev_lock, cur_lock; uint16 loid, cur_loid; - int upgraded_pairs, cur_flags, flags; + int cur_flags, flags; hashnr= node->hashnr; resource= node->resource; @@ -187,7 +270,6 @@ retry: upgrading= FALSE; cursor->blocker= cursor->upgrade_from= 0; _lf_unpin(pins, 3); - upgraded_pairs= 0; do { cursor->curr= PTR(*cursor->prev); _lf_pin(pins,1,cursor->curr); @@ -217,28 +299,24 @@ retry: (cur_hashnr == hashnr && cur_resource >= resource)) { if (cur_hashnr > hashnr || cur_resource > resource) - { - if (upgraded_pairs != 0) - goto retry; break; - } /* ok, we have a lock for this resource */ DBUG_ASSERT(lock_compatibility_matrix[prev_lock][cur_lock] >= 0); DBUG_ASSERT(lock_compatibility_matrix[cur_lock][lock] >= 0); - if (cur_flags & UPGRADED) - upgraded_pairs++; if ((cur_flags & IGNORE_ME) && ! (flags & IGNORE_ME)) { DBUG_ASSERT(cur_active); - upgraded_pairs--; if (cur_loid == loid) cursor->upgrade_from= cursor->curr; } else { prev_active= cur_active; - cur_active&= lock_compatibility_matrix[prev_lock][cur_lock]; - if (upgrading && !cur_active && upgraded_pairs == 0) + if (cur_flags & ACTIVE) + DBUG_ASSERT(prev_active == TRUE); + else + cur_active&= lock_compatibility_matrix[prev_lock][cur_lock]; + if (upgrading && !cur_active) break; if (prev_active && !cur_active) { @@ -253,8 +331,14 @@ retry: if (lock_combining_matrix[cur_lock][lock] == cur_lock) { /* new lock is compatible */ - return cur_active ? ALREADY_HAVE_THE_LOCK - : ALREADY_HAVE_THE_REQUEST; + if (cur_active) + { + cursor->blocker= cursor->curr; /* loose-locks! */ + _lf_unpin(pins, 3); /* loose-locks! */ + return ALREADY_HAVE_THE_LOCK; + } + else + return ALREADY_HAVE_THE_REQUEST; } /* not compatible, upgrading */ upgrading= TRUE; @@ -268,9 +352,9 @@ retry: cursor->blocker= cursor->curr; _lf_pin(pins, 3, cursor->curr); } - prev_lock= lock_combining_matrix[prev_lock][cur_lock]; - DBUG_ASSERT(prev_lock != N); } + prev_lock= lock_combining_matrix[prev_lock][cur_lock]; + DBUG_ASSERT(prev_lock != N); } } cursor->prev= &(cursor->curr->link); @@ -335,6 +419,10 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, node->flags|= UPGRADED; node->lock= lock_combining_matrix[cursor.upgrade_from->lock][node->lock]; } + if (!(res & NEED_TO_WAIT)) + node->flags|= ACTIVE; + else + node->flags&= ~ACTIVE; /* if we're retrying on REPEAT_ONCE_MORE */ node->link= (intptr)cursor.curr; DBUG_ASSERT(node->link != (intptr)node); DBUG_ASSERT(cursor.prev != &node->link); @@ -349,13 +437,13 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, _lf_unpin(pins, 1); _lf_unpin(pins, 2); /* - note that cursor.curr is NOT pinned on return. - this is ok as it's either a dummy node for initialize_bucket + note that blocker is not necessarily pinned here (when it's == curr). + this is ok as it's either a dummy node then for initialize_bucket and dummy nodes don't need pinning, or it's a lock of the same transaction for lockman_getlock, and it cannot be removed by another thread */ - *blocker= cursor.blocker ? cursor.blocker : cursor.curr; + *blocker= cursor.blocker; return res; } @@ -419,7 +507,7 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout) { - lf_alloc_init(&lm->alloc,sizeof(LOCK)); + lf_alloc_init(&lm->alloc,sizeof(LOCK), offsetof(LOCK,lonext)); lf_dynarray_init(&lm->array, sizeof(LOCK **)); lm->size= 1; lm->count= 0; @@ -516,13 +604,13 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, res= lockinsert(el, node, pins, &blocker); if (res & ALREADY_HAVE) { + int r; old_lock= blocker->lock; - _lf_assert_unpin(pins, 3); /* unpin should not be needed */ _lf_alloc_free(pins, node); lf_rwunlock_by_pins(pins); - res= getlock_result[old_lock][lock]; - DBUG_ASSERT(res); - return res; + r= getlock_result[old_lock][lock]; + DBUG_ASSERT(r); + return r; } /* a new value was added to the hash */ csize= lm->size; @@ -537,9 +625,8 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, struct timespec timeout; _lf_assert_pin(pins, 3); /* blocker must be pinned here */ - lf_rwunlock_by_pins(pins); - wait_for_lo= lm->loid_to_lo(blocker->loid); + /* now, this is tricky. blocker is not necessarily a LOCK we're waiting for. If it's compatible with what we want, @@ -550,12 +637,9 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, if (lock_compatibility_matrix[blocker->lock][lock]) { blocker= wait_for_lo->all_locks; - lf_pin(pins, 3, blocker); + _lf_pin(pins, 3, blocker); if (blocker != wait_for_lo->all_locks) - { - lf_rwlock_by_pins(pins); continue; - } wait_for_lo= wait_for_lo->waiting_for; } @@ -565,11 +649,11 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, to an unrelated - albeit valid - LOCK_OWNER */ if (!wait_for_lo) - { - /* blocker transaction has ended, short id was released */ - lf_rwlock_by_pins(pins); continue; - } + + lo->waiting_for= wait_for_lo; + lf_rwunlock_by_pins(pins); + /* We lock a mutex - it may belong to a wrong LOCK_OWNER, but it must belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER, @@ -587,9 +671,8 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, lf_rwlock_by_pins(pins); continue; } - /* yuck. waiting */ - lo->waiting_for= wait_for_lo; + /* yuck. waiting */ deadline= my_getsystime() + lm->lock_timeout * 10000; timeout.tv_sec= deadline/10000000; timeout.tv_nsec= (deadline % 10000000) * 100; @@ -607,11 +690,12 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, Instead we're relying on the caller to abort the transaction, and release all locks at once - see lockman_release_locks() */ + _lf_unpin(pins, 3); lf_rwunlock_by_pins(pins); return DIDNT_GET_THE_LOCK; } - lo->waiting_for= 0; } + lo->waiting_for= 0; _lf_assert_unpin(pins, 3); /* unpin should not be needed */ lf_rwunlock_by_pins(pins); return getlock_result[lock][lock]; @@ -626,14 +710,15 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, */ int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo) { - LOCK * volatile *el, *node; + LOCK * volatile *el, *node, *next; uint bucket; LF_PINS *pins= lo->pins; pthread_mutex_lock(lo->mutex); lf_rwlock_by_pins(pins); - for (node= lo->all_locks; node; node= node->lonext) + for (node= lo->all_locks; node; node= next) { + next= node->lonext; bucket= calc_hash(node->resource) % lm->size; el= _lf_dynarray_lvalue(&lm->array, bucket); if (*el == NULL) @@ -650,12 +735,12 @@ int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo) } #ifdef MY_LF_EXTRA_DEBUG +static char *lock2str[]= +{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" }; /* NOTE the function below is NOT thread-safe !!! */ -static char *lock2str[]= -{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" }; void print_lockhash(LOCKMAN *lm) { LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); @@ -664,17 +749,21 @@ void print_lockhash(LOCKMAN *lm) { intptr next= el->link; if (el->hashnr & 1) + { printf("0x%08x { resource %llu, loid %u, lock %s", el->hashnr, el->resource, el->loid, lock2str[el->lock]); + if (el->flags & IGNORE_ME) printf(" IGNORE_ME"); + if (el->flags & UPGRADED) printf(" UPGRADED"); + if (el->flags & ACTIVE) printf(" ACTIVE"); + if (DELETED(next)) printf(" ***DELETED***"); + printf("}\n"); + } else { - printf("0x%08x { dummy ", el->hashnr); + /*printf("0x%08x { dummy }\n", el->hashnr);*/ DBUG_ASSERT(el->resource == 0 && el->loid == 0 && el->lock == X); } - if (el->flags & IGNORE_ME) printf(" IGNORE_ME"); - if (el->flags & UPGRADED) printf(" UPGRADED"); - printf("}\n"); - el= (LOCK *)next; + el= PTR(next); } } #endif diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h index a3c96786935..9edd79eb7f1 100644 --- a/storage/maria/lockman.h +++ b/storage/maria/lockman.h @@ -18,7 +18,10 @@ #define _lockman_h /* - N - "no lock", not a lock, used sometimes to simplify the code + Lock levels: + ^^^^^^^^^^^ + + N - "no lock", not a lock, used sometimes internally to simplify the code S - Shared X - eXclusive IS - Intention Shared @@ -35,8 +38,8 @@ struct lockman_lock; typedef struct st_lock_owner LOCK_OWNER; struct st_lock_owner { - LF_PINS *pins; - struct lockman_lock *all_locks; + LF_PINS *pins; /* must be allocated from lockman's pinbox */ + struct lockman_lock *all_locks; /* a LIFO */ LOCK_OWNER *waiting_for; pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */ pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 49f49a3e26b..ecabf100cb8 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -128,6 +128,11 @@ static void set_short_trid(TRN *trn) trn->locks.loid= i; } +/* + DESCRIPTION + start a new transaction, allocate and initialize transaction object + mutex and cond will be used for lock waits +*/ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) { TRN *trn; @@ -148,6 +153,7 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) trnman_active_transactions++; trn= pool; + /* popping an element from a stack */ my_atomic_rwlock_wrlock(&LOCK_pool); while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn, (void *)trn->next)) @@ -213,9 +219,12 @@ void trnman_end_trn(TRN *trn, my_bool commit) LF_PINS *pins= trn->pins; pthread_mutex_lock(&LOCK_trn_list); + + /* remove from active list */ trn->next->prev= trn->prev; trn->prev->next= trn->next; + /* if this transaction was the oldest - clean up committed list */ if (trn->prev == &active_list_min) { TRN *t; @@ -232,6 +241,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) } } + /* add transaction to the committed list (for read-from relations) */ if (commit && active_list_min.next != &active_list_max) { trn->commit_trid= global_trid_generator; @@ -243,7 +253,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) res= lf_hash_insert(&trid_to_trn, pins, &trn); DBUG_ASSERT(res == 0); } - else + else /* or free it right away */ { trn->next= free_me; free_me= trn; @@ -251,6 +261,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) trnman_active_transactions--; pthread_mutex_unlock(&LOCK_trn_list); + /* the rest is done outside of a critical section */ lockman_release_locks(&maria_lockman, &trn->locks); trn->locks.mutex= 0; trn->locks.cond= 0; @@ -258,7 +269,6 @@ void trnman_end_trn(TRN *trn, my_bool commit) my_atomic_storeptr((void **)&short_trid_to_trn[trn->locks.loid], 0); my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); - while (free_me) // XXX send them to the purge thread { int res; @@ -288,7 +298,7 @@ void trnman_free_trn(TRN *trn) do { /* - without volatile cast gcc-3.4.4 moved the assignment + without this volatile cast gcc-3.4.4 moved the assignment down after the loop at -O2 */ *(TRN * volatile *)&(trn->next)= tmp; @@ -317,13 +327,13 @@ my_bool trnman_can_read_from(TRN *trn, TrID trid) LF_REQUIRE_PINS(3); if (trid < trn->min_read_from) - return TRUE; + return TRUE; /* can read */ if (trid > trn->trid) - return FALSE; + return FALSE; /* cannot read */ found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); if (!found) - return FALSE; /* not in the hash of committed transactions = cannot read*/ + return FALSE; /* not in the hash of committed transactions = cannot read */ can= (*found)->commit_trid < trn->trid; lf_unpin(trn->pins, 2); diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index 8b62ccfe094..638078fea65 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -14,7 +14,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -//#define EXTRA_VERBOSE +#undef EXTRA_VERBOSE #include @@ -24,7 +24,7 @@ #include #include "../lockman.h" -#define Nlos 10 +#define Nlos 100 LOCK_OWNER loarray[Nlos]; pthread_mutex_t mutexes[Nlos]; pthread_cond_t conds[Nlos]; @@ -51,8 +51,7 @@ LOCK_OWNER *loid2lo(uint16 loid) #define lock_ok_a(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK) #define lock_ok_i(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) #define lock_ok_l(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) -#define lock_conflict(O,R,L) test_lock(O,R,L,"cannot ",DIDNT_GET_THE_LOCK); \ - unlock_all(O) +#define lock_conflict(O,R,L) test_lock(O,R,L,"cannot ",DIDNT_GET_THE_LOCK); void test_lockman_simple() { @@ -64,7 +63,8 @@ void test_lockman_simple() lock_ok_a(1, 1, X); lock_ok_i(2, 2, IX); /* failures */ - lock_conflict(2,1,X); /* this removes all locks of lo2 */ + lock_conflict(2,1,X); + unlock_all(2); lock_ok_a(1,2,S); lock_ok_a(1,2,IS); lock_ok_a(1,2,LS); @@ -72,8 +72,36 @@ void test_lockman_simple() lock_ok_a(2,3,LS); lock_ok_i(1,3,IX); lock_ok_l(2,3,IS); - lockman_release_locks(&lockman, loid2lo(1)); - lockman_release_locks(&lockman, loid2lo(2)); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1,1,IX); + lock_conflict(2,1,S); + lock_ok_a(1,1,LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1,1,IX); + lock_ok_a(2,1,LS); + lock_ok_a(1,1,LS); + lock_ok_i(1,1,IX); + lock_ok_i(3,1,IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1,4,IS); + lock_ok_i(2,4,IS); + lock_ok_i(3,4,IS); + lock_ok_a(3,4,LS); + lock_ok_i(4,4,IS); + lock_conflict(4,4,IX); + lock_conflict(2,4,IX); + lock_ok_a(1,4,LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); } @@ -82,11 +110,13 @@ pthread_mutex_t rt_mutex; pthread_cond_t rt_cond; int rt_num_threads; int litmus; +int thread_number= 0, timeouts=0; void run_test(const char *test, pthread_handler handler, int n, int m) { pthread_t t; ulonglong now= my_getsystime(); + thread_number= timeouts= 0; litmus= 0; diag("Testing %s with %d threads, %d iterations... ", test, n, m); @@ -100,13 +130,12 @@ void run_test(const char *test, pthread_handler handler, int n, int m) ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); } -int thread_number= 0, timeouts=0; -#define Nrows 1000 -#define Ntables 10 -#define TABLE_LOCK_RATIO 10 +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; enum lock_type lock_array[6]={S,X,LS,LX,IS,IX}; char *lock2str[6]={"S","X","LS","LX","IS","IX"}; -char *res2str[6]={ +char *res2str[4]={ "DIDN'T GET THE LOCK", "GOT THE LOCK", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", @@ -128,10 +157,11 @@ pthread_handler_t test_lockman(void *arg) row= x % Nrows + Ntables; table= row % Ntables; locklevel= (x/Nrows) & 3; - if ((x/Nrows/4) % TABLE_LOCK_RATIO == 0) + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) { /* table lock */ res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]); - DIAG(("loid=%2d, table %d lock %s, res=%s", loid, table, lock2str[locklevel], res2str[res])); + DIAG(("loid=%2d, table %d lock %s, res=%s", loid, table, + lock2str[locklevel], res2str[res])); if (res == DIDNT_GET_THE_LOCK) { lockman_release_locks(&lockman, lo); @@ -145,7 +175,8 @@ pthread_handler_t test_lockman(void *arg) { /* row lock */ locklevel&= 1; res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]); - DIAG(("loid=%2d, row %d lock %s, res=%s", loid, row, lock2str[locklevel+4], res2str[res])); + DIAG(("loid=%2d, row %d lock %s, res=%s", loid, row, + lock2str[locklevel+4], res2str[res])); switch (res) { case DIDNT_GET_THE_LOCK: @@ -159,7 +190,8 @@ pthread_handler_t test_lockman(void *arg) /* not implemented, so take a regular lock */ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); - DIAG(("loid=%2d, ROW %d lock %s, res=%s", loid, row, lock2str[locklevel], res2str[res])); + DIAG(("loid=%2d, ROW %d lock %s, res=%s", loid, row, + lock2str[locklevel], res2str[res])); if (res == DIDNT_GET_THE_LOCK) { lockman_release_locks(&lockman, lo); @@ -196,7 +228,7 @@ int main() my_init(); - plan(14); + plan(31); if (my_atomic_initialize()) return exit_status(); @@ -222,11 +254,21 @@ int main() test_lockman_simple(); -#define CYCLES 100 +#define CYCLES 1000 #define THREADS Nlos /* don't change this line */ + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; run_test("lockman", test_lockman, THREADS,CYCLES); + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("lockman", test_lockman, THREADS,10000); + for (i= 0; i < Nlos; i++) { lockman_release_locks(&lockman, &loarray[i]); @@ -235,7 +277,12 @@ int main() lf_pinbox_put_pins(loarray[i].pins); } - lockman_destroy(&lockman); + { + ulonglong now= my_getsystime(); + lockman_destroy(&lockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g", ((double)now)/1e7); + } pthread_mutex_destroy(&rt_mutex); pthread_cond_destroy(&rt_cond); diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 881f80a66f8..e2177b94720 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -154,7 +154,7 @@ pthread_handler_t test_lf_pinbox(void *arg) typedef union { int32 data; - void *not_used; /* to guarantee sizeof(TLA) >= sizeof(void *) */ + void *not_used; } TLA; pthread_handler_t test_lf_alloc(void *arg) @@ -294,7 +294,7 @@ int main() pthread_mutex_init(&mutex, 0); pthread_cond_init(&cond, 0); my_atomic_rwlock_init(&rwl); - lf_alloc_init(&lf_allocator, sizeof(TLA)); + lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used)); lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0, &my_charset_bin); From aea73116c14da8e946422de8e49c93acc85817d0 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Oct 2006 12:21:30 +0200 Subject: [PATCH 40/95] post-review fixes (style) include/lf.h: comments --- include/lf.h | 43 +++++---- storage/maria/lockman.c | 8 +- storage/maria/lockman.h | 2 +- storage/maria/trnman.c | 147 ++++++++++++++++++----------- storage/maria/trnman.h | 2 +- storage/maria/unittest/lockman-t.c | 97 ++++++++++--------- storage/maria/unittest/trnman-t.c | 31 +++--- 7 files changed, 186 insertions(+), 144 deletions(-) diff --git a/include/lf.h b/include/lf.h index 45c5f109e1c..3a3b5b28ade 100644 --- a/include/lf.h +++ b/include/lf.h @@ -1,9 +1,18 @@ +/* Copyright (C) 2006 MySQL AB -/* - TODO - 1. copyright - 6. reduce the number of memory barriers -*/ + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _lf_h #define _lf_h @@ -11,7 +20,8 @@ #include /* - Generic helpers + Helpers to define both func() and _func(), where + func() is a _func() protected by my_atomic_rwlock_wrlock() */ #define lock_wrap(f,t,proto_args, args, lock) \ @@ -49,7 +59,7 @@ static inline void f proto_args \ } /* - dynamic array + wait-free dynamic array, see lf_dynarray.c 4 levels of 256 elements each mean 4311810304 elements in an array - it should be enough for a while @@ -68,14 +78,9 @@ typedef int (*lf_dynarray_func)(void *, void *); void lf_dynarray_init(LF_DYNARRAY *array, uint element_size); void lf_dynarray_destroy(LF_DYNARRAY *array); -nolock_wrap(lf_dynarray_nr, int, - (LF_DYNARRAY *array, void *el), - (array,el)); - nolock_wrap(lf_dynarray_value, void *, (LF_DYNARRAY *array, uint idx), (array,idx)); - lock_wrap(lf_dynarray_lvalue, void *, (LF_DYNARRAY *array, uint idx), (array,idx), @@ -85,7 +90,7 @@ nolock_wrap(lf_dynarray_iterate, int, (array,func,arg)); /* - pin manager for memory allocator + pin manager for memory allocator, lf_alloc-pin.c */ #define LF_PINBOX_PINS 4 @@ -102,13 +107,13 @@ typedef struct { uint32 volatile pins_in_stack; /* number of elements in array */ } LF_PINBOX; -/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ typedef struct { void * volatile pin[LF_PINBOX_PINS]; LF_PINBOX *pinbox; void *purgatory; uint32 purgatory_count; uint32 volatile link; +/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ char pad[128-sizeof(uint32)*2 -sizeof(void *)*(LF_PINBOX_PINS+2)]; } LF_PINS; @@ -160,19 +165,13 @@ lock_wrap_void(lf_pinbox_put_pins, (LF_PINS *pins), (pins), &pins->pinbox->pinstack.lock); -#if 0 -lock_wrap_void(lf_pinbox_real_free, - (LF_PINS *pins), - (pins), - &pins->pinbox->pinstack.lock); -#endif lock_wrap_void(lf_pinbox_free, (LF_PINS *pins, void *addr), (pins,addr), &pins->pinbox->pinstack.lock); /* - memory allocator + memory allocator, lf_alloc-pin.c */ typedef struct st_lf_allocator { @@ -199,7 +198,7 @@ lock_wrap(lf_alloc_new, void *, &pins->pinbox->pinstack.lock); /* - extendible hash + extendible hash, lf_hash.c */ #include diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 1712f6f2221..b37c31fb52c 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -5,7 +5,7 @@ different characteristics. long lists, few distinct resources - slow to scan, [possibly] high retry rate */ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -272,7 +272,7 @@ retry: _lf_unpin(pins, 3); do { cursor->curr= PTR(*cursor->prev); - _lf_pin(pins,1,cursor->curr); + _lf_pin(pins, 1, cursor->curr); } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); for (;;) { @@ -507,7 +507,7 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout) { - lf_alloc_init(&lm->alloc,sizeof(LOCK), offsetof(LOCK,lonext)); + lf_alloc_init(&lm->alloc, sizeof(LOCK), offsetof(LOCK, lonext)); lf_dynarray_init(&lm->array, sizeof(LOCK **)); lm->size= 1; lm->count= 0; @@ -744,7 +744,7 @@ static char *lock2str[]= void print_lockhash(LOCKMAN *lm) { LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); - printf("hash: size=%u count=%u\n", lm->size, lm->count); + printf("hash: size:%u count:%u\n", lm->size, lm->count); while (el) { intptr next= el->link; diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h index 9edd79eb7f1..6577a5e80fc 100644 --- a/storage/maria/lockman.h +++ b/storage/maria/lockman.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index ecabf100cb8..95e5be4284a 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,21 +20,35 @@ #include #include "trnman.h" +/* status variables */ uint trnman_active_transactions, trnman_allocated_transactions; -static TRN active_list_min, active_list_max, - committed_list_min, committed_list_max, *pool; +/* list of active transactions in the trid order */ +static TRN active_list_min, active_list_max; +/* list of committed transactions in the trid order */ +static TRN committed_list_min, committed_list_max; -static pthread_mutex_t LOCK_trn_list; +/* a counter, used to generate transaction ids */ static TrID global_trid_generator; -static LF_HASH trid_to_trn; -static LOCKMAN maria_lockman; +/* the mutex for everything above */ +static pthread_mutex_t LOCK_trn_list; -static TRN **short_trid_to_trn; +/* LIFO pool of unused TRN structured for reuse */ +static TRN *pool; + +/* a hash for committed transactions that maps trid to a TRN structure */ +static LF_HASH trid_to_committed_trn; + +/* an array that maps short_trid of an active transaction to a TRN structure */ +static TRN **short_trid_to_active_trn; + +/* locks for short_trid_to_active_trn and pool */ static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool; -static byte *trn_get_hash_key(const byte *trn,uint* len, my_bool unused) +static LOCKMAN maria_lockman; + +static byte *trn_get_hash_key(const byte *trn, uint* len, my_bool unused) { *len= sizeof(TrID); return (byte *) & ((*((TRN **)trn))->trid); @@ -44,7 +58,7 @@ static LOCK_OWNER *trnman_short_trid_to_TRN(uint16 short_trid) { TRN *trn; my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); - trn= my_atomic_loadptr((void **)&short_trid_to_trn[short_trid]); + trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]); my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); return (LOCK_OWNER *)trn; } @@ -52,39 +66,56 @@ static LOCK_OWNER *trnman_short_trid_to_TRN(uint16 short_trid) int trnman_init() { pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST); + + /* + Initialize lists. + active_list_max.min_read_from must be larger than any trid, + so that when an active list is empty we would could free + all committed list. + And committed_list_max itself can not be freed so + committed_list_max.commit_trid must not be smaller that + active_list_max.min_read_from + */ + active_list_max.trid= active_list_min.trid= 0; active_list_max.min_read_from= ~0; active_list_max.next= active_list_min.prev= 0; active_list_max.prev= &active_list_min; active_list_min.next= &active_list_max; - trnman_active_transactions= 0; - trnman_allocated_transactions= 0; committed_list_max.commit_trid= ~0; committed_list_max.next= committed_list_min.prev= 0; committed_list_max.prev= &committed_list_min; committed_list_min.next= &committed_list_max; + trnman_active_transactions= 0; + trnman_allocated_transactions= 0; + pool= 0; - global_trid_generator= 0; /* set later by recovery code */ - lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE, + global_trid_generator= 0; /* set later by the recovery code */ + lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE, 0, 0, trn_get_hash_key, 0); my_atomic_rwlock_init(&LOCK_short_trid_to_trn); my_atomic_rwlock_init(&LOCK_pool); - short_trid_to_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), + short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), MYF(MY_WME|MY_ZEROFILL)); - if (!short_trid_to_trn) + if (!short_trid_to_active_trn) return 1; - short_trid_to_trn--; /* min short_trid is 1 */ + short_trid_to_active_trn--; /* min short_trid is 1 */ lockman_init(&maria_lockman, &trnman_short_trid_to_TRN, 10000); return 0; } +/* + NOTE + this could only be called in the "idle" state - no transaction can be + running. See asserts below. +*/ int trnman_destroy() { - DBUG_ASSERT(trid_to_trn.count == 0); + DBUG_ASSERT(trid_to_committed_trn.count == 0); DBUG_ASSERT(trnman_active_transactions == 0); DBUG_ASSERT(active_list_max.prev == &active_list_min); DBUG_ASSERT(active_list_min.next == &active_list_max); @@ -98,14 +129,20 @@ int trnman_destroy() DBUG_ASSERT(trn->locks.cond == 0); my_free((void *)trn, MYF(0)); } - lf_hash_destroy(&trid_to_trn); + lf_hash_destroy(&trid_to_committed_trn); pthread_mutex_destroy(&LOCK_trn_list); my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn); my_atomic_rwlock_destroy(&LOCK_pool); - my_free((void *)(short_trid_to_trn+1), MYF(0)); + my_free((void *)(short_trid_to_active_trn+1), MYF(0)); lockman_destroy(&maria_lockman); } +/* + NOTE + TrID is limited to 6 bytes. Initial value of the generator + is set by the recovery code - being read from the last checkpoint + (or 1 on a first run). +*/ static TrID new_trid() { DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); @@ -120,8 +157,8 @@ static void set_short_trid(TRN *trn) for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */ { void *tmp= NULL; - if (short_trid_to_trn[i] == NULL && - my_atomic_casptr((void **)&short_trid_to_trn[i], &tmp, trn)) + if (short_trid_to_active_trn[i] == NULL && + my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn)) break; } my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn); @@ -138,38 +175,37 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) TRN *trn; /* - see trnman_end_trn to see why we need a mutex here - - and as we have a mutex, we can as well do everything - under it - allocating a TRN, incrementing trnman_active_transactions, - setting trn->min_read_from. + we have a mutex, to do simple things under it - allocate a TRN, + increment trnman_active_transactions, set trn->min_read_from. Note that all the above is fast. generating short_trid may be slow, - as it involves scanning a big array - so it's still done - outside of the mutex. + as it involves scanning a large array - so it's done outside of the + mutex. */ pthread_mutex_lock(&LOCK_trn_list); - trnman_active_transactions++; + /* Allocating a new TRN structure */ trn= pool; - /* popping an element from a stack */ + /* Popping an unused TRN from the pool */ my_atomic_rwlock_wrlock(&LOCK_pool); while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn, (void *)trn->next)) /* no-op */; my_atomic_rwlock_wrunlock(&LOCK_pool); + /* Nothing in the pool ? Allocate a new one */ if (!trn) { trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME)); - if (!trn) + if (unlikely(!trn)) { pthread_mutex_unlock(&LOCK_trn_list); return 0; } trnman_allocated_transactions++; } + trnman_active_transactions++; trn->min_read_from= active_list_min.next->trid; @@ -181,36 +217,31 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) active_list_max.prev= trn->prev->next= trn; pthread_mutex_unlock(&LOCK_trn_list); - trn->pins= lf_hash_get_pins(&trid_to_trn); + trn->pins= lf_hash_get_pins(&trid_to_committed_trn); - if (!trn->min_read_from) + if (unlikely(!trn->min_read_from)) trn->min_read_from= trn->trid; + trn->commit_trid= 0; + trn->locks.mutex= mutex; trn->locks.cond= cond; - trn->commit_trid= 0; trn->locks.waiting_for= 0; trn->locks.all_locks= 0; trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc); - set_short_trid(trn); /* this must be the last! */ + /* + only after the following function TRN is considered initialized, + so it must be done the last + */ + set_short_trid(trn); return trn; } /* - remove a trn from the active list, - move to committed list, - set commit_trid - - TODO - integrate with log manager. That means: - a common "commit" mutex - forcing the log and setting commit_trid - must be done atomically (QQ how the heck it could be done with - group commit ???) XXX - why did I think it must be done atomically ? - - trid_to_trn, active_list_*, and committed_list_* can be - updated asyncronously. + remove a trn from the active list. + if necessary - move to committed list and set commit_trid */ void trnman_end_trn(TRN *trn, my_bool commit) { @@ -224,7 +255,11 @@ void trnman_end_trn(TRN *trn, my_bool commit) trn->next->prev= trn->prev; trn->prev->next= trn->next; - /* if this transaction was the oldest - clean up committed list */ + /* + if trn was the oldest active transaction, now that it goes away there + may be committed transactions in the list which no active transaction + needs to bother about - clean up the committed list + */ if (trn->prev == &active_list_min) { TRN *t; @@ -232,6 +267,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) t->commit_trid < active_list_min.next->min_read_from; t= t->next) /* no-op */; + /* found transactions committed before the oldest active one */ if (t != committed_list_min.next) { free_me= committed_list_min.next; @@ -241,7 +277,10 @@ void trnman_end_trn(TRN *trn, my_bool commit) } } - /* add transaction to the committed list (for read-from relations) */ + /* + if transaction is committed and it was not the only active transaction - + add it to the committed list (which is used for read-from relation) + */ if (commit && active_list_min.next != &active_list_max) { trn->commit_trid= global_trid_generator; @@ -250,10 +289,10 @@ void trnman_end_trn(TRN *trn, my_bool commit) trn->prev= committed_list_max.prev; committed_list_max.prev= trn->prev->next= trn; - res= lf_hash_insert(&trid_to_trn, pins, &trn); + res= lf_hash_insert(&trid_to_committed_trn, pins, &trn); DBUG_ASSERT(res == 0); } - else /* or free it right away */ + else /* otherwise free it right away */ { trn->next= free_me; free_me= trn; @@ -266,7 +305,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) trn->locks.mutex= 0; trn->locks.cond= 0; my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); - my_atomic_storeptr((void **)&short_trid_to_trn[trn->locks.loid], 0); + my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->locks.loid], 0); my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); while (free_me) // XXX send them to the purge thread @@ -275,7 +314,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) TRN *t= free_me; free_me= free_me->next; - res= lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID)); + res= lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID)); trnman_free_trn(t); } @@ -331,7 +370,7 @@ my_bool trnman_can_read_from(TRN *trn, TrID trid) if (trid > trn->trid) return FALSE; /* cannot read */ - found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); + found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid)); if (!found) return FALSE; /* not in the hash of committed transactions = cannot read */ diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h index 9470678f3b2..c059947f35c 100644 --- a/storage/maria/trnman.h +++ b/storage/maria/trnman.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index 638078fea65..ad382a360fb 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -42,16 +42,20 @@ LOCK_OWNER *loid2lo(uint16 loid) return loarray+loid-1; } -#define unlock_all(O) diag("lo" #O "> release all locks"); \ +#define unlock_all(O) diag("lo" #O "> release all locks"); \ lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman) -#define test_lock(O, R, L, S, RES) \ - ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \ - "lo" #O "> " S " lock resource " #R " with " #L "-lock"); \ +#define test_lock(O, R, L, S, RES) \ + ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \ + "lo" #O "> " S " lock resource " #R " with " #L "-lock"); \ print_lockhash(&lockman) -#define lock_ok_a(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK) -#define lock_ok_i(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) -#define lock_ok_l(O,R,L) test_lock(O,R,L,"",GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) -#define lock_conflict(O,R,L) test_lock(O,R,L,"cannot ",DIDNT_GET_THE_LOCK); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); void test_lockman_simple() { @@ -63,41 +67,41 @@ void test_lockman_simple() lock_ok_a(1, 1, X); lock_ok_i(2, 2, IX); /* failures */ - lock_conflict(2,1,X); + lock_conflict(2, 1, X); unlock_all(2); - lock_ok_a(1,2,S); - lock_ok_a(1,2,IS); - lock_ok_a(1,2,LS); - lock_ok_i(1,3,IX); - lock_ok_a(2,3,LS); - lock_ok_i(1,3,IX); - lock_ok_l(2,3,IS); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); unlock_all(1); unlock_all(2); - lock_ok_i(1,1,IX); - lock_conflict(2,1,S); - lock_ok_a(1,1,LS); + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); unlock_all(1); unlock_all(2); - lock_ok_i(1,1,IX); - lock_ok_a(2,1,LS); - lock_ok_a(1,1,LS); - lock_ok_i(1,1,IX); - lock_ok_i(3,1,IS); + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); unlock_all(1); unlock_all(2); unlock_all(3); - lock_ok_i(1,4,IS); - lock_ok_i(2,4,IS); - lock_ok_i(3,4,IS); - lock_ok_a(3,4,LS); - lock_ok_i(4,4,IS); - lock_conflict(4,4,IX); - lock_conflict(2,4,IX); - lock_ok_a(1,4,LS); + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); unlock_all(1); unlock_all(2); unlock_all(3); @@ -110,7 +114,7 @@ pthread_mutex_t rt_mutex; pthread_cond_t rt_cond; int rt_num_threads; int litmus; -int thread_number= 0, timeouts=0; +int thread_number= 0, timeouts= 0; void run_test(const char *test, pthread_handler handler, int n, int m) { pthread_t t; @@ -121,7 +125,8 @@ void run_test(const char *test, pthread_handler handler, int n, int m) diag("Testing %s with %d threads, %d iterations... ", test, n, m); for (rt_num_threads= n ; n ; n--) - pthread_create(&t, &rt_attr, handler, &m); + if (pthread_create(&t, &rt_attr, handler, &m)) + abort(); pthread_mutex_lock(&rt_mutex); while (rt_num_threads) pthread_cond_wait(&rt_cond, &rt_mutex); @@ -133,9 +138,9 @@ void run_test(const char *test, pthread_handler handler, int n, int m) int Nrows= 100; int Ntables= 10; int table_lock_ratio= 10; -enum lock_type lock_array[6]={S,X,LS,LX,IS,IX}; -char *lock2str[6]={"S","X","LS","LX","IS","IX"}; -char *res2str[4]={ +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[4]= { "DIDN'T GET THE LOCK", "GOT THE LOCK", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", @@ -160,12 +165,12 @@ pthread_handler_t test_lockman(void *arg) if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) { /* table lock */ res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]); - DIAG(("loid=%2d, table %d lock %s, res=%s", loid, table, + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, lock2str[locklevel], res2str[res])); if (res == DIDNT_GET_THE_LOCK) { lockman_release_locks(&lockman, lo); - DIAG(("loid=%2d, release all locks", loid)); + DIAG(("loid %2d, release all locks", loid)); timeout++; continue; } @@ -175,13 +180,13 @@ pthread_handler_t test_lockman(void *arg) { /* row lock */ locklevel&= 1; res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]); - DIAG(("loid=%2d, row %d lock %s, res=%s", loid, row, + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, lock2str[locklevel+4], res2str[res])); switch (res) { case DIDNT_GET_THE_LOCK: lockman_release_locks(&lockman, lo); - DIAG(("loid=%2d, release all locks", loid)); + DIAG(("loid %2d, release all locks", loid)); timeout++; continue; case GOT_THE_LOCK: @@ -190,12 +195,12 @@ pthread_handler_t test_lockman(void *arg) /* not implemented, so take a regular lock */ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); - DIAG(("loid=%2d, ROW %d lock %s, res=%s", loid, row, + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, lock2str[locklevel], res2str[res])); if (res == DIDNT_GET_THE_LOCK) { lockman_release_locks(&lockman, lo); - DIAG(("loid=%2d, release all locks", loid)); + DIAG(("loid %2d, release all locks", loid)); timeout++; continue; } @@ -234,7 +239,7 @@ int main() return exit_status(); pthread_attr_init(&rt_attr); - pthread_attr_setdetachstate(&rt_attr,PTHREAD_CREATE_DETACHED); + pthread_attr_setdetachstate(&rt_attr, PTHREAD_CREATE_DETACHED); pthread_mutex_init(&rt_mutex, 0); pthread_cond_init(&rt_cond, 0); @@ -261,13 +266,13 @@ int main() Nrows= 100; Ntables= 10; table_lock_ratio= 10; - run_test("lockman", test_lockman, THREADS,CYCLES); + run_test("lockman", test_lockman, THREADS, CYCLES); /* "real-life" simulation - many rows, no table locks */ Nrows= 1000000; Ntables= 10; table_lock_ratio= 0; - run_test("lockman", test_lockman, THREADS,10000); + run_test("lockman", test_lockman, THREADS, 10000); for (i= 0; i < Nlos; i++) { diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c index 6d4b48c6d3d..7bde9f5f720 100644 --- a/storage/maria/unittest/trnman-t.c +++ b/storage/maria/unittest/trnman-t.c @@ -41,7 +41,7 @@ pthread_handler_t test_trnman(void *arg) pthread_mutex_t mutexes[MAX_ITER]; pthread_cond_t conds[MAX_ITER]; - for (i=0; i < MAX_ITER; i++) + for (i= 0; i < MAX_ITER; i++) { pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); pthread_cond_init(&conds[i], 0); @@ -60,7 +60,7 @@ pthread_handler_t test_trnman(void *arg) } } - for (i=0; i < MAX_ITER; i++) + for (i= 0; i < MAX_ITER; i++) { pthread_mutex_destroy(&mutexes[i]); pthread_cond_destroy(&conds[i]); @@ -84,7 +84,8 @@ void run_test(const char *test, pthread_handler handler, int n, int m) diag("Testing %s with %d threads, %d iterations... ", test, n, m); for (rt_num_threads= n ; n ; n--) - pthread_create(&t, &rt_attr, handler, &m); + if (pthread_create(&t, &rt_attr, handler, &m)) + abort(); pthread_mutex_lock(&rt_mutex); while (rt_num_threads) pthread_cond_wait(&rt_cond, &rt_mutex); @@ -94,11 +95,10 @@ void run_test(const char *test, pthread_handler handler, int n, int m) } #define ok_read_from(T1, T2, RES) \ - i=trnman_can_read_from(trn[T1], trid[T2]); \ + i= trnman_can_read_from(trn[T1], trn[T2]->trid); \ ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot") #define start_transaction(T) \ - trn[T]= trnman_new_trn(&mutexes[T], &conds[T]); \ - trid[T]= trn[T]->trid + trn[T]= trnman_new_trn(&mutexes[T], &conds[T]) #define commit(T) trnman_commit_trn(trn[T]) #define abort(T) trnman_abort_trn(trn[T]) @@ -106,12 +106,11 @@ void run_test(const char *test, pthread_handler handler, int n, int m) void test_trnman_read_from() { TRN *trn[Ntrns]; - TrID trid[Ntrns]; pthread_mutex_t mutexes[Ntrns]; pthread_cond_t conds[Ntrns]; int i; - for (i=0; i < Ntrns; i++) + for (i= 0; i < Ntrns; i++) { pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); pthread_cond_init(&conds[i], 0); @@ -119,19 +118,19 @@ void test_trnman_read_from() start_transaction(0); /* start trn1 */ start_transaction(1); /* start trn2 */ - ok_read_from(1,0,0); + ok_read_from(1, 0, 0); commit(0); /* commit trn1 */ start_transaction(2); /* start trn4 */ abort(2); /* abort trn4 */ start_transaction(3); /* start trn5 */ - ok_read_from(3,0,1); - ok_read_from(3,1,0); - ok_read_from(3,2,0); + ok_read_from(3, 0, 1); + ok_read_from(3, 1, 0); + ok_read_from(3, 2, 0); commit(1); /* commit trn2 */ - ok_read_from(3,1,0); + ok_read_from(3, 1, 0); commit(3); /* commit trn5 */ - for (i=0; i < Ntrns; i++) + for (i= 0; i < Ntrns; i++) { pthread_mutex_destroy(&mutexes[i]); pthread_cond_destroy(&conds[i]); @@ -148,7 +147,7 @@ int main() return exit_status(); pthread_attr_init(&rt_attr); - pthread_attr_setdetachstate(&rt_attr,PTHREAD_CREATE_DETACHED); + pthread_attr_setdetachstate(&rt_attr, PTHREAD_CREATE_DETACHED); pthread_mutex_init(&rt_mutex, 0); pthread_cond_init(&rt_cond, 0); @@ -158,7 +157,7 @@ int main() trnman_init(); test_trnman_read_from(); - run_test("trnman", test_trnman, THREADS,CYCLES); + run_test("trnman", test_trnman, THREADS, CYCLES); diag("mallocs: %d", trnman_allocated_transactions); { From a79868ae99567e974373d29d631db552f998ccc7 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Oct 2006 13:33:49 +0200 Subject: [PATCH 41/95] comments --- mysys/lf_alloc-pin.c | 197 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 161 insertions(+), 36 deletions(-) diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 842c9690463..ac55185864a 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -17,8 +17,63 @@ /* wait-free concurrent allocator based on pinning addresses - TODO test with more than 256 threads - TODO test w/o alloca + It works as follows: every thread (strictly speaking - every CPU, but it's + too difficult to do) has a small array of pointers. They're called "pins". + Before using an object its address must be stored in this array (pinned). + When an object is no longer necessary its address must be removed from + this array (unpinned). When a thread wants to free() an object it + scans all pins of all threads to see if somebody has this object pinned. + If yes - the object is not freed (but stored in a purgatory). + To reduce the cost of a single free() pins are not scanned on every free() + but only added to (thread-local) purgatory. On every LF_PURGATORY_SIZE + free() purgatory is scanned and all unpinned objects are freed. + + Pins are used to solve ABA problem. To use pins one must obey + a pinning protocol: + 1. Let's assume that PTR is a shared pointer to an object. Shared means + that any thread may modify it anytime to point to a different object and + free the old object. Later the freed object may be potentially allocated + by another thread. If we're unlucky that another thread may set PTR to + point to this object again. This is ABA problem. + 2. Create a local pointer LOCAL_PTR. + 3. Pin the PTR in a loop: + do + { + LOCAL_PTR= PTR; + pin(PTR, PIN_NUMBER); + } while (LOCAL_PTR != PTR) + 4. It is guaranteed that after the loop is ended, LOCAL_PTR + points to an object (or NULL, if PTR may be NULL), that + will never be freed. It is not guaranteed though + that LOCAL_PTR == PTR + 5. When done working with the object, remove the pin: + unpin(PIN_NUMBER) + 6. When copying pins (as in the list: + while () + { + pin(CUR, 0); + do + { + NEXT=CUR->next; + pin(NEXT, 1); + } while (NEXT != CUR->next); + ... + ... + pin(CUR, 1); + CUR=NEXT; + } + which keeps CUR address constantly pinned), note than pins may be copied + only upwards (!!!), that is pin N to pin M > N. + 7. Don't keep the object pinned longer than necessary - the number of pins + you have is limited (and small), keeping an object pinned prevents its + reuse and cause unnecessary mallocs. + + Implementation details: + Pins are given away from a "pinbox". Pinbox is stack-based allocator. + It used dynarray for storing pins, new elements are allocated by dynarray + as necessary, old are pushed in the stack for reuse. ABA is solved by + versioning a pointer - because we use an array, a pointer to pins is 32 bit, + upper 32 bits are used for a version. */ #include @@ -29,6 +84,10 @@ static void _lf_pinbox_real_free(LF_PINS *pins); +/* + Initialize a pinbox. Must be usually called from lf_alloc_init. + See the latter for details. +*/ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, lf_pinbox_free_func *free_func,void *free_func_arg) { @@ -47,6 +106,14 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox) lf_dynarray_destroy(&pinbox->pinstack); } +/* + Get pins from a pinbox. Usually called via lf_alloc_get_pins() or + lf_hash_get_pins(). + + DESCRIPTION + get a new LF_PINS structure from a stack of unused pins, + or allocate a new one out of dynarray. +*/ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) { uint32 pins, next, top_ver; @@ -71,6 +138,14 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) return el; } +/* + Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or + lf_hash_put_pins(). + + DESCRIPTION + empty the purgatory (XXX deadlock warning below!), + push LF_PINS structure to a stack +*/ void _lf_pinbox_put_pins(LF_PINS *pins) { LF_PINBOX *pinbox= pins->pinbox; @@ -80,11 +155,11 @@ void _lf_pinbox_put_pins(LF_PINS *pins) { int i; for (i= 0; i < LF_PINBOX_PINS; i++) - assert(pins->pin[i] == 0); + DBUG_ASSERT(pins->pin[i] == 0); } #endif /* - Note - this will deadlock if other threads will wait for + XXX this will deadlock if other threads will wait for the caller to do something after _lf_pinbox_put_pins(), and they would have pinned addresses that the caller wants to free. Thus: only free pins when all work is done and nobody can wait for you!!! @@ -130,6 +205,13 @@ static int ptr_cmp(void **a, void **b) (PINS)->purgatory_count++; \ } while (0) +/* + Free an object allocated via pinbox allocator + + DESCRIPTION + add an object to purgatory. if necessary, call _lf_pinbox_real_free() + to actually free something. +*/ void _lf_pinbox_free(LF_PINS *pins, void *addr) { if (pins->purgatory_count % LF_PURGATORY_SIZE) @@ -142,6 +224,10 @@ struct st_harvester { int npins; }; +/* + callback for _lf_dynarray_iterate: + scan all pins or all threads and accumulate all pins +*/ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) { int i; @@ -159,6 +245,10 @@ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) return 0; } +/* + callback for _lf_dynarray_iterate: + scan all pins or all threads and see if addr is present there +*/ static int match_pins(LF_PINS *el, void *addr) { int i; @@ -170,6 +260,9 @@ static int match_pins(LF_PINS *el, void *addr) return 0; } +/* + Scan the purgatory as free everything that can be freed +*/ static void _lf_pinbox_real_free(LF_PINS *pins) { int npins; @@ -187,10 +280,12 @@ static void _lf_pinbox_real_free(LF_PINS *pins) addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); hv.granary= addr; hv.npins= npins; + /* scan the dynarray and accumulate all pinned addresses */ _lf_dynarray_iterate(&pinbox->pinstack, (lf_dynarray_func)harvest_pins, &hv); npins= hv.granary-addr; + /* and sort them */ if (npins) qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); } @@ -207,7 +302,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) list= *(void **)((char *)cur+pinbox->free_ptr_offset); if (npins) { - if (addr) + if (addr) /* use binary search */ { void **a,**b,**c; for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) @@ -220,7 +315,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) if (cur == *a || cur == *b) goto found; } - else + else /* no alloca - no cookie. linear search here */ { if (_lf_dynarray_iterate(&pinbox->pinstack, (lf_dynarray_func)match_pins, cur)) @@ -236,6 +331,10 @@ found: } } +/* + callback for _lf_pinbox_real_free to free an unpinned object - + add it back to the allocator stack +*/ static void alloc_free(void *node, LF_ALLOCATOR *allocator) { void *tmp; @@ -247,36 +346,21 @@ static void alloc_free(void *node, LF_ALLOCATOR *allocator) LF_BACKOFF); } -LF_REQUIRE_PINS(1); -void *_lf_alloc_new(LF_PINS *pins) -{ - LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); - void *node; - for (;;) - { - do - { - node= allocator->top; - _lf_pin(pins, 0, node); - } while (node != allocator->top && LF_BACKOFF); - if (!node) - { - if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) - goto ret; -#ifdef MY_LF_EXTRA_DEBUG - my_atomic_add32(&allocator->mallocs, 1); -#endif - goto ret; - } - if (my_atomic_casptr((void **)&allocator->top, - (void *)&node, *(void **)node)) - goto ret; - } -ret: - _lf_unpin(pins, 0); - return node; -} +/* lock-free memory allocator for fixed-size objects */ +LF_REQUIRE_PINS(1); + +/* + initialize lock-free allocatod. + + SYNOPSYS + allocator - + size a size of an object to allocate + free_ptr_offset an offset inside the object to a sizeof(void *) + memory that is guaranteed to be unused after + the object is put in the purgatory. Unused by ANY + thread, not only the purgatory owner. +*/ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) { lf_pinbox_init(&allocator->pinbox, free_ptr_offset, @@ -285,8 +369,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) allocator->mallocs= 0; allocator->element_size= size; DBUG_ASSERT(size >= (int)sizeof(void *)); + DBUG_ASSERT(free_ptr_offset < size); } +/* + destroy the allocator, free everything that's in it +*/ void lf_alloc_destroy(LF_ALLOCATOR *allocator) { void *el= allocator->top; @@ -301,8 +389,45 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator) } /* + Allocate and return an new object. + + DESCRIPTION + Pop an unused object from the stack or malloc it is the stack is empty. + pin[0] is used, it's removed on return. +*/ +void *_lf_alloc_new(LF_PINS *pins) +{ + LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + void *node; + for (;;) + { + do + { + node= allocator->top; + _lf_pin(pins, 0, node); + } while (node != allocator->top && LF_BACKOFF); + if (!node) + { + if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + break; +#ifdef MY_LF_EXTRA_DEBUG + my_atomic_add32(&allocator->mallocs, 1); +#endif + break; + } + if (my_atomic_casptr((void **)&allocator->top, + (void *)&node, *(void **)node)) + break; + } + _lf_unpin(pins, 0); + return node; +} + +/* + count the number of objects in a pool. + NOTE - this is NOT thread-safe !!! + This is NOT thread-safe !!! */ uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) { From fb818dd7b0be3b4facd159de14bc3d9afcbcf16e Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 20 Oct 2006 14:02:18 +0200 Subject: [PATCH 42/95] more post-review fixes - comments, renames, error checks in unit tests concurrency bug in lock manager include/my_global.h: compile-time assert macro mysys/my_atomic.c: use compile_time_assert() macro storage/maria/lockman.c: bug in concurrent lockdelete (with retries) storage/maria/trnman.c: more post-review fixes - comments, renames storage/maria/trnman.h: more post-review fixes - comments storage/maria/unittest/lockman-t.c: friendlier error checks storage/maria/unittest/trnman-t.c: friendlier error checks --- include/my_global.h | 7 ++++ mysys/my_atomic.c | 2 +- storage/maria/lockman.c | 11 ++++-- storage/maria/trnman.c | 60 +++++++++++++++++++++--------- storage/maria/trnman.h | 10 ++++- storage/maria/unittest/lockman-t.c | 14 ++++--- storage/maria/unittest/trnman-t.c | 15 +++++++- 7 files changed, 89 insertions(+), 30 deletions(-) diff --git a/include/my_global.h b/include/my_global.h index 7f57b43f89f..7f7b502e1a7 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -428,6 +428,13 @@ C_MODE_END */ #include +/* an assert that works at compile-time. only for constant expression */ +#define compile_time_assert(X) \ + do \ + { \ + char compile_time_assert[(X) ? 1 : -1]; \ + } while(0) + /* Go around some bugs in different OS and compilers */ #if defined (HPUX11) && defined(_LARGEFILE_SOURCE) #define _LARGEFILE64_SOURCE diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index fbeb3d63bef..e1af7e0328d 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -35,7 +35,7 @@ */ int my_atomic_initialize() { - char assert_the_size[sizeof(intptr) == sizeof(void *) ? 1 : -1]; + compile_time_assert(sizeof(intptr) == sizeof(void *)); /* currently the only thing worth checking is SMP/UP issue */ #ifdef MY_ATOMIC_MODE_DUMMY return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU; diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index b37c31fb52c..937c61021cc 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -421,13 +421,14 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, } if (!(res & NEED_TO_WAIT)) node->flags|= ACTIVE; - else - node->flags&= ~ACTIVE; /* if we're retrying on REPEAT_ONCE_MORE */ node->link= (intptr)cursor.curr; DBUG_ASSERT(node->link != (intptr)node); DBUG_ASSERT(cursor.prev != &node->link); if (!my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + { res= REPEAT_ONCE_MORE; + node->flags&= ~ACTIVE; + } if (res & LOCK_UPGRADE) cursor.upgrade_from->flags|= IGNORE_ME; } @@ -496,7 +497,11 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) lockfind(head, node, &cursor, pins); } else + { res= REPEAT_ONCE_MORE; + if (cursor.upgrade_from) /* to satisfy the assert in lockfind */ + cursor.upgrade_from->flags|= IGNORE_ME; + } } while (res == REPEAT_ONCE_MORE); _lf_unpin(pins, 0); _lf_unpin(pins, 1); @@ -744,7 +749,7 @@ static char *lock2str[]= void print_lockhash(LOCKMAN *lm) { LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); - printf("hash: size:%u count:%u\n", lm->size, lm->count); + printf("hash: size %u count %u\n", lm->size, lm->count); while (el) { intptr next= el->link; diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 95e5be4284a..a227df16395 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -48,25 +48,35 @@ static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool; static LOCKMAN maria_lockman; +/* + short transaction id is at the same time its identifier + for a lock manager - its lock owner identifier (loid) +*/ +#define short_id locks.loid + +/* + NOTE + Just as short_id doubles as loid, this function doubles as + short_trid_to_LOCK_OWNER. See the compile-time assert below. +*/ +static TRN *short_trid_to_TRN(uint16 short_trid) +{ + TRN *trn; + compile_time_assert(offsetof(TRN, locks) == 0); + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + return (TRN *)trn; +} + static byte *trn_get_hash_key(const byte *trn, uint* len, my_bool unused) { *len= sizeof(TrID); return (byte *) & ((*((TRN **)trn))->trid); } -static LOCK_OWNER *trnman_short_trid_to_TRN(uint16 short_trid) -{ - TRN *trn; - my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); - trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]); - my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); - return (LOCK_OWNER *)trn; -} - int trnman_init() { - pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST); - /* Initialize lists. active_list_max.min_read_from must be larger than any trid, @@ -95,6 +105,7 @@ int trnman_init() global_trid_generator= 0; /* set later by the recovery code */ lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE, 0, 0, trn_get_hash_key, 0); + pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST); my_atomic_rwlock_init(&LOCK_short_trid_to_trn); my_atomic_rwlock_init(&LOCK_pool); short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), @@ -103,7 +114,7 @@ int trnman_init() return 1; short_trid_to_active_trn--; /* min short_trid is 1 */ - lockman_init(&maria_lockman, &trnman_short_trid_to_TRN, 10000); + lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000); return 0; } @@ -162,7 +173,7 @@ static void set_short_trid(TRN *trn) break; } my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn); - trn->locks.loid= i; + trn->short_id= i; } /* @@ -210,7 +221,7 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) trn->min_read_from= active_list_min.next->trid; trn->trid= new_trid(); - trn->locks.loid= 0; + trn->short_id= 0; trn->next= &active_list_max; trn->prev= active_list_max.prev; @@ -242,6 +253,15 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) /* remove a trn from the active list. if necessary - move to committed list and set commit_trid + + NOTE + Locks are released at the end. In particular, after placing the + transaction in commit list, and after setting commit_trid. It's + important, as commit_trid affects visibility. Locks don't affect + anything they simply delay execution of other threads - they could be + released arbitrarily late. In other words, when locks are released it + serves as a start banner for other threads, they start to run. So + everything they may need must be ready at that point. */ void trnman_end_trn(TRN *trn, my_bool commit) { @@ -305,7 +325,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) trn->locks.mutex= 0; trn->locks.cond= 0; my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); - my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->locks.loid], 0); + my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0); my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); while (free_me) // XXX send them to the purge thread @@ -325,9 +345,13 @@ void trnman_end_trn(TRN *trn, my_bool commit) /* free a trn (add to the pool, that is) - note - we can never really free() a TRN if there's at least one - other running transaction - see, e.g., how lock waits are implemented - in lockman.c + note - we can never really free() a TRN if there's at least one other + running transaction - see, e.g., how lock waits are implemented in + lockman.c + The same is true for other lock-free data structures too. We may need some + kind of FLUSH command to reset them all - ensuring that no transactions are + running. It may even be called automatically on checkpoints if no + transactions are running. */ void trnman_free_trn(TRN *trn) { diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h index c059947f35c..eeab253ae71 100644 --- a/storage/maria/trnman.h +++ b/storage/maria/trnman.h @@ -22,9 +22,17 @@ typedef uint64 TrID; /* our TrID is 6 bytes */ typedef struct st_transaction TRN; +/* + trid - 6 byte transaction identifier. Assigned when a transaction + is created. Transaction can always be identified by its trid, + even after transaction has ended. + + short_trid - 2-byte transaction identifier, identifies a running + transaction, is reassigned when transaction ends. +*/ struct st_transaction { - LOCK_OWNER locks; + LOCK_OWNER locks; /* must be the first! see short_trid_to_TRN() */ LF_PINS *pins; TrID trid, min_read_from, commit_trid; TRN *next, *prev; diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index ad382a360fb..6b5e5912fdf 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -123,16 +123,20 @@ void run_test(const char *test, pthread_handler handler, int n, int m) thread_number= timeouts= 0; litmus= 0; - diag("Testing %s with %d threads, %d iterations... ", test, n, m); + diag("Running %s with %d threads, %d iterations... ", test, n, m); for (rt_num_threads= n ; n ; n--) if (pthread_create(&t, &rt_attr, handler, &m)) - abort(); + { + diag("Could not create thread"); + litmus++; + rt_num_threads--; + } pthread_mutex_lock(&rt_mutex); while (rt_num_threads) pthread_cond_wait(&rt_cond, &rt_mutex); pthread_mutex_unlock(&rt_mutex); now= my_getsystime()-now; - ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); } int Nrows= 100; @@ -266,13 +270,13 @@ int main() Nrows= 100; Ntables= 10; table_lock_ratio= 10; - run_test("lockman", test_lockman, THREADS, CYCLES); + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); /* "real-life" simulation - many rows, no table locks */ Nrows= 1000000; Ntables= 10; table_lock_ratio= 0; - run_test("lockman", test_lockman, THREADS, 10000); + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); for (i= 0; i < Nlos; i++) { diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c index 7bde9f5f720..822f5cd755b 100644 --- a/storage/maria/unittest/trnman-t.c +++ b/storage/maria/unittest/trnman-t.c @@ -52,14 +52,21 @@ pthread_handler_t test_trnman(void *arg) y= x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ m-= n= x % MAX_ITER; for (i= 0; i < n; i++) + { trn[i]= trnman_new_trn(&mutexes[i], &conds[i]); + if (!trn[i]) + { + diag("trnman_new_trn() failed"); + litmus++; + } + } for (i= 0; i < n; i++) { y= (y*19 + 7) % 31; trnman_end_trn(trn[i], y & 1); } } - +end: for (i= 0; i < MAX_ITER; i++) { pthread_mutex_destroy(&mutexes[i]); @@ -85,7 +92,11 @@ void run_test(const char *test, pthread_handler handler, int n, int m) diag("Testing %s with %d threads, %d iterations... ", test, n, m); for (rt_num_threads= n ; n ; n--) if (pthread_create(&t, &rt_attr, handler, &m)) - abort(); + { + diag("Could not create thread"); + litmus++; + rt_num_threads--; + } pthread_mutex_lock(&rt_mutex); while (rt_num_threads) pthread_cond_wait(&rt_cond, &rt_mutex); From 79ff2b036c39f82a81d9890c9f4ab5539fd6755f Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 21 Oct 2006 15:53:42 +0200 Subject: [PATCH 43/95] malloc() failure is unlikely(). storage/maria/trnman.c: malloc() failure is unlikely --- storage/maria/trnman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index a227df16395..8262c57fa85 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -110,7 +110,7 @@ int trnman_init() my_atomic_rwlock_init(&LOCK_pool); short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), MYF(MY_WME|MY_ZEROFILL)); - if (!short_trid_to_active_trn) + if (unlikely(!short_trid_to_active_trn)) return 1; short_trid_to_active_trn--; /* min short_trid is 1 */ From d1852da743be3a636bba35114d1342b4b11039f0 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 21 Oct 2006 20:06:56 +0200 Subject: [PATCH 44/95] less confusing and more correct name for my_atomic_swap --- include/atomic/rwlock.h | 2 +- include/atomic/x86-gcc.h | 2 +- include/atomic/x86-msvc.h | 2 +- include/my_atomic.h | 22 +++++++++++----------- unittest/mysys/my_atomic-t.c | 8 ++++---- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h index 3d8edb9e27e..5a3ca5a434e 100644 --- a/include/atomic/rwlock.h +++ b/include/atomic/rwlock.h @@ -47,7 +47,7 @@ typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t; #endif #define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav; -#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav; +#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav; #define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a; #define make_atomic_load_body(S) ret= *a; #define make_atomic_store_body(S) *a= v; diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index 0be8fdf9244..cd84faeee31 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -43,7 +43,7 @@ #define make_atomic_add_body(S) \ asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) #endif -#define make_atomic_swap_body(S) \ +#define make_atomic_fas_body(S) \ asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) #define make_atomic_cas_body(S) \ asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h index 8f3e55aaed7..bd5a7fdd96c 100644 --- a/include/atomic/x86-msvc.h +++ b/include/atomic/x86-msvc.h @@ -43,7 +43,7 @@ _asm setz al \ _asm movzx ret, al \ } -#define make_atomic_swap_body(S) \ +#define make_atomic_fas_body(S) \ _asm { \ _asm mov reg_ ## S, v \ _asm xchg *a, reg_ ## S \ diff --git a/include/my_atomic.h b/include/my_atomic.h index 921b55e68a2..aad69295998 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -43,11 +43,11 @@ STATIC_INLINE int ## S my_atomic_add ## S( \ return v; \ } -#define make_atomic_swap(S) \ -STATIC_INLINE int ## S my_atomic_swap ## S( \ +#define make_atomic_fas(S) \ +STATIC_INLINE int ## S my_atomic_fas ## S( \ int ## S volatile *a, int ## S v) \ { \ - make_atomic_swap_body(S); \ + make_atomic_fas_body(S); \ return v; \ } @@ -80,8 +80,8 @@ STATIC_INLINE void my_atomic_store ## S( \ #define make_atomic_add(S) \ extern int ## S my_atomic_add ## S(int ## S volatile *a, int ## S v); -#define make_atomic_swap(S) \ -extern int ## S my_atomic_swap ## S(int ## S volatile *a, int ## S v); +#define make_atomic_fas(S) \ +extern int ## S my_atomic_fas ## S(int ## S volatile *a, int ## S v); #define make_atomic_cas(S) \ extern int my_atomic_cas ## S(int ## S volatile *a, int ## S *cmp, int ## S set); @@ -113,10 +113,10 @@ make_atomic_store(16) make_atomic_store(32) make_atomic_store(ptr) -make_atomic_swap( 8) -make_atomic_swap(16) -make_atomic_swap(32) -make_atomic_swap(ptr) +make_atomic_fas( 8) +make_atomic_fas(16) +make_atomic_fas(32) +make_atomic_fas(ptr) #ifdef _atomic_h_cleanup_ #include _atomic_h_cleanup_ @@ -127,12 +127,12 @@ make_atomic_swap(ptr) #undef make_atomic_cas #undef make_atomic_load #undef make_atomic_store -#undef make_atomic_swap +#undef make_atomic_fas #undef make_atomic_add_body #undef make_atomic_cas_body #undef make_atomic_load_body #undef make_atomic_store_body -#undef make_atomic_swap_body +#undef make_atomic_fas_body #undef intptr #ifndef LF_BACKOFF diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index e2177b94720..a3b2241f5ed 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -62,7 +62,7 @@ pthread_handler_t test_atomic_add_handler(void *arg) 5. subtract result from a32 must get 0 in a32 at the end */ -pthread_handler_t test_atomic_swap_handler(void *arg) +pthread_handler_t test_atomic_fas_handler(void *arg) { int m=*(int *)arg; uint32 x=my_atomic_add32(&b32, 1); @@ -72,14 +72,14 @@ pthread_handler_t test_atomic_swap_handler(void *arg) for (; m ; m--) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_swap32(&c32, x); + x=my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } if (!x) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_swap32(&c32, x); + x=my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } @@ -306,7 +306,7 @@ int main() #define THREADS 100 test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES); - test_atomic("my_atomic_swap32", test_atomic_swap_handler, THREADS,CYCLES); + test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES); test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES); test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES); test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); From 32739cbbcac9b8af23b4708db21bf2854b5e27a8 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 22 Oct 2006 16:05:18 +0200 Subject: [PATCH 45/95] make sure all test threads really exit before main() - workaround for NPTL bug in some linux kernels (Bug#22320) --- storage/maria/unittest/lockman-t.c | 39 +++---- storage/maria/unittest/trnman-t.c | 36 +++--- unittest/mysys/my_atomic-t.c | 173 ++++++++++++----------------- 3 files changed, 108 insertions(+), 140 deletions(-) diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index 6b5e5912fdf..de824f05671 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -109,36 +109,41 @@ void test_lockman_simple() } -pthread_attr_t rt_attr; -pthread_mutex_t rt_mutex; -pthread_cond_t rt_cond; int rt_num_threads; int litmus; int thread_number= 0, timeouts= 0; void run_test(const char *test, pthread_handler handler, int n, int m) { - pthread_t t; + pthread_t *threads; ulonglong now= my_getsystime(); + int i; thread_number= timeouts= 0; litmus= 0; + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + diag("Running %s with %d threads, %d iterations... ", test, n, m); - for (rt_num_threads= n ; n ; n--) - if (pthread_create(&t, &rt_attr, handler, &m)) + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) { diag("Could not create thread"); - litmus++; - rt_num_threads--; + abort(); } - pthread_mutex_lock(&rt_mutex); - while (rt_num_threads) - pthread_cond_wait(&rt_cond, &rt_mutex); - pthread_mutex_unlock(&rt_mutex); + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); now= my_getsystime()-now; ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); } +pthread_mutex_t rt_mutex; int Nrows= 100; int Ntables= 10; int table_lock_ratio= 10; @@ -222,10 +227,7 @@ pthread_handler_t test_lockman(void *arg) rt_num_threads--; timeouts+= timeout; if (!rt_num_threads) - { - pthread_cond_signal(&rt_cond); diag("number of timeouts: %d", timeouts); - } pthread_mutex_unlock(&rt_mutex); return 0; @@ -236,16 +238,13 @@ int main() int i; my_init(); + pthread_mutex_init(&rt_mutex, 0); plan(31); if (my_atomic_initialize()) return exit_status(); - pthread_attr_init(&rt_attr); - pthread_attr_setdetachstate(&rt_attr, PTHREAD_CREATE_DETACHED); - pthread_mutex_init(&rt_mutex, 0); - pthread_cond_init(&rt_cond, 0); lockman_init(&lockman, &loid2lo, 50); @@ -294,8 +293,6 @@ int main() } pthread_mutex_destroy(&rt_mutex); - pthread_cond_destroy(&rt_cond); - pthread_attr_destroy(&rt_attr); my_end(0); return exit_status(); } diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c index 822f5cd755b..7da8202b881 100644 --- a/storage/maria/unittest/trnman-t.c +++ b/storage/maria/unittest/trnman-t.c @@ -22,9 +22,7 @@ #include #include "../trnman.h" -pthread_attr_t rt_attr; pthread_mutex_t rt_mutex; -pthread_cond_t rt_cond; int rt_num_threads; int litmus; @@ -74,8 +72,6 @@ end: } pthread_mutex_lock(&rt_mutex); rt_num_threads--; - if (!rt_num_threads) - pthread_cond_signal(&rt_cond); pthread_mutex_unlock(&rt_mutex); return 0; @@ -84,25 +80,32 @@ end: void run_test(const char *test, pthread_handler handler, int n, int m) { - pthread_t t; + pthread_t *threads; ulonglong now= my_getsystime(); + int i; litmus= 0; + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + diag("Testing %s with %d threads, %d iterations... ", test, n, m); - for (rt_num_threads= n ; n ; n--) - if (pthread_create(&t, &rt_attr, handler, &m)) + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) { diag("Could not create thread"); - litmus++; - rt_num_threads--; + abort(); } - pthread_mutex_lock(&rt_mutex); - while (rt_num_threads) - pthread_cond_wait(&rt_cond, &rt_mutex); - pthread_mutex_unlock(&rt_mutex); + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); now= my_getsystime()-now; - ok(litmus == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); } #define ok_read_from(T1, T2, RES) \ @@ -157,10 +160,7 @@ int main() if (my_atomic_initialize()) return exit_status(); - pthread_attr_init(&rt_attr); - pthread_attr_setdetachstate(&rt_attr, PTHREAD_CREATE_DETACHED); pthread_mutex_init(&rt_mutex, 0); - pthread_cond_init(&rt_cond, 0); #define CYCLES 10000 #define THREADS 10 @@ -179,8 +179,6 @@ int main() } pthread_mutex_destroy(&rt_mutex); - pthread_cond_destroy(&rt_cond); - pthread_attr_destroy(&rt_attr); my_end(0); return exit_status(); } diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index a3b2241f5ed..88fa2b03242 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -21,24 +21,19 @@ #include #include -volatile uint32 a32,b32,c32; +volatile uint32 a32,b32,c32, N; my_atomic_rwlock_t rwl; LF_ALLOCATOR lf_allocator; LF_HASH lf_hash; -pthread_attr_t thr_attr; -pthread_mutex_t mutex; -pthread_cond_t cond; -int N; - /* add and sub a random number in a loop. Must get 0 at the end */ pthread_handler_t test_atomic_add_handler(void *arg) { - int m=(*(int *)arg)/2; + int m= (*(int *)arg)/2; int32 x; - for (x=((int)(intptr)(&m)); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { - x=(x*m+0x87654321) & INT_MAX32; + x= (x*m+0x87654321) & INT_MAX32; my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); my_atomic_rwlock_wrunlock(&rwl); @@ -47,10 +42,6 @@ pthread_handler_t test_atomic_add_handler(void *arg) my_atomic_add32(&a32, -x); my_atomic_rwlock_wrunlock(&rwl); } - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } @@ -64,22 +55,22 @@ pthread_handler_t test_atomic_add_handler(void *arg) */ pthread_handler_t test_atomic_fas_handler(void *arg) { - int m=*(int *)arg; - uint32 x=my_atomic_add32(&b32, 1); + int m= *(int *)arg; + uint32 x= my_atomic_add32(&b32, 1); my_atomic_add32(&a32, x); for (; m ; m--) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_fas32(&c32, x); + x= my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } if (!x) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_fas32(&c32, x); + x= my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } @@ -87,10 +78,6 @@ pthread_handler_t test_atomic_fas_handler(void *arg) my_atomic_add32(&a32, -x); my_atomic_rwlock_wrunlock(&rwl); - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } @@ -101,29 +88,25 @@ pthread_handler_t test_atomic_fas_handler(void *arg) */ pthread_handler_t test_atomic_cas_handler(void *arg) { - int m=(*(int *)arg)/2, ok=0; + int m= (*(int *)arg)/2, ok= 0; int32 x, y; - for (x=((int)(intptr)(&m)); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { my_atomic_rwlock_wrlock(&rwl); - y=my_atomic_load32(&a32); + y= my_atomic_load32(&a32); my_atomic_rwlock_wrunlock(&rwl); - x=(x*m+0x87654321) & INT_MAX32; + x= (x*m+0x87654321) & INT_MAX32; do { my_atomic_rwlock_wrlock(&rwl); - ok=my_atomic_cas32(&a32, &y, y+x); + ok= my_atomic_cas32(&a32, &y, y+x); my_atomic_rwlock_wrunlock(&rwl); } while (!ok) ; do { my_atomic_rwlock_wrlock(&rwl); - ok=my_atomic_cas32(&a32, &y, y-x); + ok= my_atomic_cas32(&a32, &y, y-x); my_atomic_rwlock_wrunlock(&rwl); } while (!ok) ; } - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } @@ -132,23 +115,18 @@ pthread_handler_t test_atomic_cas_handler(void *arg) */ pthread_handler_t test_lf_pinbox(void *arg) { - int m=*(int *)arg; - int32 x=0; + int m= *(int *)arg; + int32 x= 0; LF_PINS *pins; - pins=lf_pinbox_get_pins(&lf_allocator.pinbox); + pins= lf_pinbox_get_pins(&lf_allocator.pinbox); - for (x=((int)(intptr)(&m)); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { lf_pinbox_put_pins(pins); - pins=lf_pinbox_get_pins(&lf_allocator.pinbox); + pins= lf_pinbox_get_pins(&lf_allocator.pinbox); } lf_pinbox_put_pins(pins); - pthread_mutex_lock(&mutex); - N--; - if (!N) - pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } @@ -159,122 +137,124 @@ typedef union { pthread_handler_t test_lf_alloc(void *arg) { - int m=(*(int *)arg)/2; - int32 x,y=0; + int m= (*(int *)arg)/2; + int32 x,y= 0; LF_PINS *pins; - pins=lf_alloc_get_pins(&lf_allocator); + pins= lf_alloc_get_pins(&lf_allocator); - for (x=((int)(intptr)(&m)); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { TLA *node1, *node2; - x=(x*m+0x87654321) & INT_MAX32; - node1=(TLA *)lf_alloc_new(pins); - node1->data=x; - y+=node1->data; - node1->data=0; - node2=(TLA *)lf_alloc_new(pins); - node2->data=x; - y-=node2->data; - node2->data=0; + x= (x*m+0x87654321) & INT_MAX32; + node1= (TLA *)lf_alloc_new(pins); + node1->data= x; + y+= node1->data; + node1->data= 0; + node2= (TLA *)lf_alloc_new(pins); + node2->data= x; + y-= node2->data; + node2->data= 0; lf_alloc_free(pins, node1); lf_alloc_free(pins, node2); } lf_alloc_put_pins(pins); my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, y); - my_atomic_rwlock_wrunlock(&rwl); - pthread_mutex_lock(&mutex); - N--; - if (!N) + + if (my_atomic_add32(&N, -1) == 1) { diag("%d mallocs, %d pins in stack", lf_allocator.mallocs, lf_allocator.pinbox.pins_in_stack); #ifdef MY_LF_EXTRA_DEBUG - a32|=lf_allocator.mallocs - lf_alloc_in_pool(&lf_allocator); + a32|= lf_allocator.mallocs - lf_alloc_in_pool(&lf_allocator); #endif - pthread_cond_signal(&cond); } - pthread_mutex_unlock(&mutex); + my_atomic_rwlock_wrunlock(&rwl); return 0; } #define N_TLH 1000 pthread_handler_t test_lf_hash(void *arg) { - int m=(*(int *)arg)/(2*N_TLH); - int32 x,y,z,sum=0, ins=0; + int m= (*(int *)arg)/(2*N_TLH); + int32 x,y,z,sum= 0, ins= 0; LF_PINS *pins; - pins=lf_hash_get_pins(&lf_hash); + pins= lf_hash_get_pins(&lf_hash); - for (x=((int)(intptr)(&m)); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { int i; - y=x; - for (i=0; i < N_TLH; i++) + y= x; + for (i= 0; i < N_TLH; i++) { - x=(x*(m+i)+0x87654321) & INT_MAX32; - z=(x<0) ? -x : x; + x= (x*(m+i)+0x87654321) & INT_MAX32; + z= (x<0) ? -x : x; if (lf_hash_insert(&lf_hash, pins, &z)) { - sum+=z; + sum+= z; ins++; } } - for (i=0; i < N_TLH; i++) + for (i= 0; i < N_TLH; i++) { - y=(y*(m+i)+0x87654321) & INT_MAX32; - z=(y<0) ? -y : y; + y= (y*(m+i)+0x87654321) & INT_MAX32; + z= (y<0) ? -y : y; if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z))) - sum-=z; + sum-= z; } } lf_hash_put_pins(pins); my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, sum); my_atomic_add32(&b32, ins); - my_atomic_rwlock_wrunlock(&rwl); - pthread_mutex_lock(&mutex); - N--; - if (!N) + my_atomic_add32(&a32, y); + + if (my_atomic_add32(&N, -1) == 1) { diag("%d mallocs, %d pins in stack, %d hash size, %d inserts", lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_stack, lf_hash.size, b32); - a32|=lf_hash.count; - pthread_cond_signal(&cond); + a32|= lf_hash.count; } - pthread_mutex_unlock(&mutex); + my_atomic_rwlock_wrunlock(&rwl); return 0; } void test_atomic(const char *test, pthread_handler handler, int n, int m) { - pthread_t t; - ulonglong now=my_getsystime(); + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; a32= 0; b32= 0; c32= 0; - diag("Testing %s with %d threads, %d iterations... ", test, n, m); - for (N=n ; n ; n--) + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) { - if (pthread_create(&t, &thr_attr, handler, &m) != 0) + diag("Out of memory"); + abort(); + } + + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + N= n; + for (i= 0 ; i < n ; i++) + { + if (pthread_create(threads+i, 0, handler, &m) != 0) { diag("Could not create thread"); - a32= 1; - goto err; + abort(); } } - pthread_mutex_lock(&mutex); - while (N) - pthread_cond_wait(&cond, &mutex); - pthread_mutex_unlock(&mutex); - now=my_getsystime()-now; + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; err: ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32); + my_free((void *)threads, MYF(0)); } int main() @@ -289,10 +269,6 @@ int main() plan(7); ok(err == 0, "my_atomic_initialize() returned %d", err); - pthread_attr_init(&thr_attr); - pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); - pthread_mutex_init(&mutex, 0); - pthread_cond_init(&cond, 0); my_atomic_rwlock_init(&rwl); lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used)); lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0, @@ -315,9 +291,6 @@ int main() lf_hash_destroy(&lf_hash); lf_alloc_destroy(&lf_allocator); - pthread_mutex_destroy(&mutex); - pthread_cond_destroy(&cond); - pthread_attr_destroy(&thr_attr); my_atomic_rwlock_destroy(&rwl); my_end(0); return exit_status(); From bef65f33c2f075140850c98a1779e636e66a5412 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 23 Oct 2006 12:44:08 +0200 Subject: [PATCH 46/95] trnman_destroy returns void, remove unused variables storage/maria/trnman.h: trnman_destroy returns void --- storage/maria/trnman.c | 9 ++++----- storage/maria/trnman.h | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index a227df16395..c80882b7941 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -124,7 +124,7 @@ int trnman_init() this could only be called in the "idle" state - no transaction can be running. See asserts below. */ -int trnman_destroy() +void trnman_destroy() { DBUG_ASSERT(trid_to_committed_trn.count == 0); DBUG_ASSERT(trnman_active_transactions == 0); @@ -265,7 +265,6 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) */ void trnman_end_trn(TRN *trn, my_bool commit) { - int res; TRN *free_me= 0; LF_PINS *pins= trn->pins; @@ -303,8 +302,9 @@ void trnman_end_trn(TRN *trn, my_bool commit) */ if (commit && active_list_min.next != &active_list_max) { - trn->commit_trid= global_trid_generator; + int res; + trn->commit_trid= global_trid_generator; trn->next= &committed_list_max; trn->prev= committed_list_max.prev; committed_list_max.prev= trn->prev->next= trn; @@ -330,11 +330,10 @@ void trnman_end_trn(TRN *trn, my_bool commit) while (free_me) // XXX send them to the purge thread { - int res; TRN *t= free_me; free_me= free_me->next; - res= lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID)); + lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID)); trnman_free_trn(t); } diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h index eeab253ae71..409e354d423 100644 --- a/storage/maria/trnman.h +++ b/storage/maria/trnman.h @@ -44,7 +44,7 @@ struct st_transaction extern uint trnman_active_transactions, trnman_allocated_transactions; int trnman_init(void); -int trnman_destroy(void); +void trnman_destroy(void); TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond); void trnman_end_trn(TRN *trn, my_bool commit); #define trnman_commit_trn(T) trnman_end_trn(T, TRUE) From ce707d9f7fb589a1a928ccc58b34b03a9a3a48d0 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 23 Oct 2006 15:13:51 +0200 Subject: [PATCH 47/95] include/my_atomic.h: comment include/my_atomic.h: comment --- include/my_atomic.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/include/my_atomic.h b/include/my_atomic.h index aad69295998..8efad2802c0 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -14,6 +14,40 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + This header defines five atomic operations: + + my_atomic_add#(&var, what) + add 'what' to *var, and return the old value of *var + + my_atomic_fas#(&var, what) + 'Fetch And Store' + store 'what' in *var, and return the old value of *var + + my_atomic_cas#(&var, &old, new) + 'Compare And Swap' + if *var is equal to *old, then store 'new' in *var, and return TRUE + otherwise store *var in *old, and return FALSE + + my_atomic_load#(&var) + return *var + + my_atomic_store#(&var, what) + store 'what' in *var + + '#' is substituted by a size suffix - 8, 16, 32, or ptr + (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr). + + NOTE This operations are not always atomic, so they always must be + enclosed in my_atomic_rwlock_rdlock(lock)/my_atomic_rwlock_rdunlock(lock) + or my_atomic_rwlock_wrlock(lock)/my_atomic_rwlock_wrunlock(lock). + Hint: if a code block makes intensive use of atomic ops, it make sense + to take/release rwlock once for the whole block, not for every statement. + + On architectures where these operations are really atomic, rwlocks will + be optimized away. +*/ + #ifndef my_atomic_rwlock_init #define intptr void * From 7ca33ae5b592143eb773ccfb71ee76d871374b46 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 27 Oct 2006 17:09:31 +0200 Subject: [PATCH 48/95] comments, minor changes --- comments mysys/lf_alloc-pin.c: comments mysys/lf_dynarray.c: comments mysys/lf_hash.c: comments, charset-aware comparison storage/maria/trnman.c: comments storage/maria/unittest/lockman-t.c: test case for a bug unittest/mysys/my_atomic-t.c: removed mistakenly copied line --- mysys/lf_alloc-pin.c | 64 +++++++++++++------------ mysys/lf_dynarray.c | 25 +++++++++- mysys/lf_hash.c | 75 +++++++++++++++++++++++------- storage/maria/trnman.c | 11 ++++- storage/maria/unittest/lockman-t.c | 14 ++++-- unittest/mysys/my_atomic-t.c | 13 +++--- 6 files changed, 140 insertions(+), 62 deletions(-) diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index ac55185864a..b96fe42311b 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,3 +1,4 @@ +// TODO multi-pinbox /* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify @@ -17,24 +18,25 @@ /* wait-free concurrent allocator based on pinning addresses - It works as follows: every thread (strictly speaking - every CPU, but it's - too difficult to do) has a small array of pointers. They're called "pins". - Before using an object its address must be stored in this array (pinned). - When an object is no longer necessary its address must be removed from - this array (unpinned). When a thread wants to free() an object it - scans all pins of all threads to see if somebody has this object pinned. - If yes - the object is not freed (but stored in a purgatory). - To reduce the cost of a single free() pins are not scanned on every free() - but only added to (thread-local) purgatory. On every LF_PURGATORY_SIZE - free() purgatory is scanned and all unpinned objects are freed. + It works as follows: every thread (strictly speaking - every CPU, but + it's too difficult to do) has a small array of pointers. They're called + "pins". Before using an object its address must be stored in this array + (pinned). When an object is no longer necessary its address must be + removed from this array (unpinned). When a thread wants to free() an + object it scans all pins of all threads to see if somebody has this + object pinned. If yes - the object is not freed (but stored in a + "purgatory"). To reduce the cost of a single free() pins are not scanned + on every free() but only added to (thread-local) purgatory. On every + LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects + are freed. Pins are used to solve ABA problem. To use pins one must obey a pinning protocol: 1. Let's assume that PTR is a shared pointer to an object. Shared means - that any thread may modify it anytime to point to a different object and - free the old object. Later the freed object may be potentially allocated - by another thread. If we're unlucky that another thread may set PTR to - point to this object again. This is ABA problem. + that any thread may modify it anytime to point to a different object + and free the old object. Later the freed object may be potentially + allocated by another thread. If we're unlucky that another thread may + set PTR to point to this object again. This is ABA problem. 2. Create a local pointer LOCAL_PTR. 3. Pin the PTR in a loop: do @@ -42,31 +44,31 @@ LOCAL_PTR= PTR; pin(PTR, PIN_NUMBER); } while (LOCAL_PTR != PTR) - 4. It is guaranteed that after the loop is ended, LOCAL_PTR + 4. It is guaranteed that after the loop has ended, LOCAL_PTR points to an object (or NULL, if PTR may be NULL), that will never be freed. It is not guaranteed though - that LOCAL_PTR == PTR + that LOCAL_PTR == PTR (as PTR can change any time) 5. When done working with the object, remove the pin: unpin(PIN_NUMBER) - 6. When copying pins (as in the list: + 6. When copying pins (as in the list traversing loop: + pin(CUR, 1); while () { - pin(CUR, 0); - do - { - NEXT=CUR->next; - pin(NEXT, 1); - } while (NEXT != CUR->next); + do // standard + { // pinning + NEXT=CUR->next; // loop + pin(NEXT, 0); // see #3 + } while (NEXT != CUR->next); // above ... ... - pin(CUR, 1); CUR=NEXT; + pin(CUR, 1); // copy pin[0] to pin[1] } - which keeps CUR address constantly pinned), note than pins may be copied - only upwards (!!!), that is pin N to pin M > N. - 7. Don't keep the object pinned longer than necessary - the number of pins - you have is limited (and small), keeping an object pinned prevents its - reuse and cause unnecessary mallocs. + which keeps CUR address constantly pinned), note than pins may be + copied only upwards (!!!), that is pin[N] to pin[M], M > N. + 7. Don't keep the object pinned longer than necessary - the number of + pins you have is limited (and small), keeping an object pinned + prevents its reuse and cause unnecessary mallocs. Implementation details: Pins are given away from a "pinbox". Pinbox is stack-based allocator. @@ -85,7 +87,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins); /* - Initialize a pinbox. Must be usually called from lf_alloc_init. + Initialize a pinbox. Normally called from lf_alloc_init. See the latter for details. */ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, @@ -214,9 +216,9 @@ static int ptr_cmp(void **a, void **b) */ void _lf_pinbox_free(LF_PINS *pins, void *addr) { + add_to_purgatory(pins, addr); if (pins->purgatory_count % LF_PURGATORY_SIZE) _lf_pinbox_real_free(pins); - add_to_purgatory(pins, addr); } struct st_harvester { diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index ade1c28d51c..a7a4968ddd8 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -26,10 +26,15 @@ Every element is aligned to sizeof(element) boundary (to avoid false sharing if element is big enough). + LF_DYNARRAY is a recursive structure. On the zero level + LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements, + on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers + to arrays of elements, on the second level it's an array of pointers + to arrays of pointers to arrays of elements. And so on. + Actually, it's wait-free, not lock-free ;-) */ -#undef DBUG_OFF #include #include #include @@ -75,6 +80,10 @@ static const int dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= LF_DYNARRAY_LEVEL_LENGTH }; +/* + Returns a valid lvalue pointer to the element number 'idx'. + Allocates memory if necessary. +*/ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) { void * ptr, * volatile * ptr_ptr= 0; @@ -123,6 +132,10 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) return ptr + array->size_of_element * idx; } +/* + Returns a pointer to the element number 'idx' + or NULL if an element does not exists +*/ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) { void * ptr, * volatile * ptr_ptr= 0; @@ -157,6 +170,16 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, return 0; } +/* + Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements + in lf_dynarray. + + DESCRIPTION + lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements + each. _lf_dynarray_iterate() calls user-supplied function on every array + from the set. It is the fastest way to scan the array, faster than + for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); } +*/ int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) { int i, res; diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 45b45f7531e..66ad672f345 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -29,22 +29,35 @@ LF_REQUIRE_PINS(3); +/* An element of the list */ typedef struct { - intptr volatile link; - uint32 hashnr; + intptr volatile link; /* a pointer to the next element in a listand a flag */ + uint32 hashnr; /* reversed hash number, for sorting */ const uchar *key; uint keylen; } LF_SLIST; +/* + a structure to pass the context (pointers two the three successive elements + in a list) from lfind to linsert/ldelete +*/ typedef struct { intptr volatile *prev; LF_SLIST *curr, *next; } CURSOR; +/* + the last bit in LF_SLIST::link is a "deleted" flag. + the helper macros below convert it to a pure pointer or a pure flag +*/ #define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) #define DELETED(V) ((V) & 1) /* + DESCRIPTION + Search for hashnr/key/keylen in the list starting from 'head' and + position the cursor. The list is ORDER BY hashnr, key + RETURN 0 - not found 1 - found @@ -53,7 +66,7 @@ typedef struct { cursor is positioned in either case pins[0..2] are used, they are NOT removed on return */ -static int lfind(LF_SLIST * volatile *head, uint32 hashnr, +static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; @@ -89,7 +102,8 @@ retry: if (cur_hashnr >= hashnr) { int r=1; - if (cur_hashnr > hashnr || (r=memcmp(cur_key, key, keylen)) >= 0) + if (cur_hashnr > hashnr || + (r=my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) return !r; } cursor->prev=&(cursor->curr->link); @@ -112,22 +126,26 @@ retry: } /* + DESCRIPTION + insert a 'node' in the list that starts from 'head' in the correct + position (as found by lfind) + RETURN 0 - inserted - not 0 - a pointer to a conflict (not pinned and thus unusable) + not 0 - a pointer to a duplicate (not pinned and thus unusable) NOTE it uses pins[0..2], on return all pins are removed. */ -static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, - LF_PINS *pins, uint flags) +static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, + LF_SLIST *node, LF_PINS *pins, uint flags) { CURSOR cursor; int res=-1; do { - if (lfind(head, node->hashnr, node->key, node->keylen, + if (lfind(head, cs, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) res=0; /* duplicate found */ @@ -147,13 +165,18 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, } /* + DESCRIPTION + deletes a node as identified by hashnr/keey/keylen from the list + that starts from 'head' + RETURN 0 - ok 1 - not found + NOTE it uses pins[0..2], on return all pins are removed. */ -static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, +static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, const uchar *key, uint keylen, LF_PINS *pins) { CURSOR cursor; @@ -161,7 +184,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, do { - if (!lfind(head, hashnr, key, keylen, &cursor, pins)) + if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins)) res= 1; else if (my_atomic_casptr((void **)&(cursor.curr->link), @@ -171,7 +194,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, (void **)&cursor.curr, cursor.next)) _lf_alloc_free(pins, cursor.curr); else - lfind(head, hashnr, key, keylen, &cursor, pins); + lfind(head, cs, hashnr, key, keylen, &cursor, pins); res= 0; } } while (res == -1); @@ -182,18 +205,24 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, } /* + DESCRIPTION + searches for a node as identified by hashnr/keey/keylen in the list + that starts from 'head' + RETURN 0 - not found node - found + NOTE it uses pins[0..2], on return the pin[2] keeps the node found all other pins are removed. */ -static LF_SLIST *lsearch(LF_SLIST * volatile *head, uint32 hashnr, - const uchar *key, uint keylen, LF_PINS *pins) +static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, + uint32 hashnr, const uchar *key, uint keylen, + LF_PINS *pins) { CURSOR cursor; - int res=lfind(head, hashnr, key, keylen, &cursor, pins); + int res=lfind(head, cs, hashnr, key, keylen, &cursor, pins); if (res) _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); @@ -219,6 +248,9 @@ static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) #define MAX_LOAD 1.0 static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); +/* + Initializes lf_hash, the arguments are compatible with hash_init +*/ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset) @@ -254,9 +286,14 @@ void lf_hash_destroy(LF_HASH *hash) } /* + DESCRIPTION + inserts a new element to a hash. it will have a _copy_ of + data, not a pointer to it. + RETURN 0 - inserted 1 - didn't (unique key conflict) + NOTE see linsert() for pin usage notes */ @@ -275,7 +312,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) if (*el == NULL) initialize_bucket(hash, el, bucket, pins); node->hashnr=my_reverse_bits(hashnr) | 1; - if (linsert(el, node, pins, hash->flags)) + if (linsert(el, hash->charset, node, pins, hash->flags)) { _lf_alloc_free(pins, node); lf_rwunlock_by_pins(pins); @@ -305,7 +342,8 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - if (ldelete(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins)) + if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, + (uchar *)key, keylen, pins)) { lf_rwunlock_by_pins(pins); return 1; @@ -329,7 +367,8 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - found= lsearch(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins); + found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, + (uchar *)key, keylen, pins); lf_rwunlock_by_pins(pins); return found ? found+1 : 0; } @@ -348,7 +387,7 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, dummy->hashnr=my_reverse_bits(bucket); dummy->key=dummy_key; dummy->keylen=0; - if ((cur= linsert(el, dummy, pins, 0))) + if ((cur= linsert(el, hash->charset, dummy, pins, 0))) { my_free((void *)dummy, MYF(0)); dummy= cur; diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index c80882b7941..27f124fd1ed 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -198,7 +198,10 @@ TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) /* Allocating a new TRN structure */ trn= pool; - /* Popping an unused TRN from the pool */ + /* + Popping an unused TRN from the pool + (ABA isn't possible, we're behind a mutex + */ my_atomic_rwlock_wrlock(&LOCK_pool); while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn, (void *)trn->next)) @@ -328,6 +331,12 @@ void trnman_end_trn(TRN *trn, my_bool commit) my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0); my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + /* + we, under the mutex, removed going-in-free_me transactions from the + active and committed lists, thus nobody else may see them when it scans + those lists, and thus nobody may want to free them. Now we don't + need a mutex to access free_me list + */ while (free_me) // XXX send them to the purge thread { TRN *t= free_me; diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index de824f05671..7b37c983864 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -14,7 +14,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#undef EXTRA_VERBOSE +//#define EXTRA_VERBOSE #include @@ -46,7 +46,7 @@ LOCK_OWNER *loid2lo(uint16 loid) lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman) #define test_lock(O, R, L, S, RES) \ ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \ - "lo" #O "> " S " lock resource " #R " with " #L "-lock"); \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ print_lockhash(&lockman) #define lock_ok_a(O, R, L) \ test_lock(O, R, L, "", GOT_THE_LOCK) @@ -107,6 +107,12 @@ void test_lockman_simple() unlock_all(3); unlock_all(4); + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); } int rt_num_threads; @@ -240,7 +246,7 @@ int main() my_init(); pthread_mutex_init(&rt_mutex, 0); - plan(31); + plan(35); if (my_atomic_initialize()) return exit_status(); @@ -289,7 +295,7 @@ int main() ulonglong now= my_getsystime(); lockman_destroy(&lockman); now= my_getsystime()-now; - diag("lockman_destroy: %g", ((double)now)/1e7); + diag("lockman_destroy: %g secs", ((double)now)/1e7); } pthread_mutex_destroy(&rt_mutex); diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 88fa2b03242..a33e233c29a 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -209,7 +209,6 @@ pthread_handler_t test_lf_hash(void *arg) my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, sum); my_atomic_add32(&b32, ins); - my_atomic_add32(&a32, y); if (my_atomic_add32(&N, -1) == 1) { @@ -281,12 +280,12 @@ int main() #endif #define THREADS 100 - test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES); - test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES); - test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES); - test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES); - test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); - test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES); + test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES); + test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES); + test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES); + test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES); + test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); + test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES/10); lf_hash_destroy(&lf_hash); lf_alloc_destroy(&lf_allocator); From 887383d4e496277f79500f38ae789d2d5595476c Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 30 Oct 2006 12:44:33 +0100 Subject: [PATCH 49/95] Manually importing Ingo's fix for BUG#22119 "Changing MI_KEY_BLOCK_LENGTH makes a wrong myisamchk" in the Maria tree as it is really needed to get "ma_test_all" to pass (this bug showed up in Maria first, not in MyISAM). Now ma_test_all does not have corruption messages about test2 anymore, and shows the same output as mi_test_all except that ma_test_all has this at the start: lt-maria_chk: MARIA file test1 lt-maria_chk: warning: Size of indexfile is: 8192 Should be: 16384 MARIA-table 'test1' is usable but should be fixed This was already true before importing the bugfix. Wonder if normal. NOTE: this bugfix is currently in 5.1-engines, in a few days will be in the main 5.1, then we'll merge 5.1 into Maria: this will merge the bugfix into storage/myisam, but there will be no need to apply it to storage/maria again. I just couldn't wait a few days for the 5.1-engines->5.1 merge to be allowed. mysql-test/r/maria.result: result update mysql-test/t/maria.test: test for BUG#22119 storage/maria/ma_check.c: fix for BUG#22119 --- mysql-test/r/maria.result | 45 ++++++++++++++++++++ mysql-test/t/maria.test | 44 ++++++++++++++++++++ storage/maria/ma_check.c | 88 ++++++++++++++++++++++++++++++++------- 3 files changed, 161 insertions(+), 16 deletions(-) diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 396b5da4bc1..39c7c0b65dd 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -1613,5 +1613,50 @@ create table t1 (a int not null, key key_block_size=1024 (a)); ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '=1024 (a))' at line 1 create table t1 (a int not null, key `a` key_block_size=1024 (a)); ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at line 1 +CREATE TABLE t1 ( +c1 INT, +c2 VARCHAR(300), +KEY (c1) KEY_BLOCK_SIZE 1024, +KEY (c2) KEY_BLOCK_SIZE 8192 +); +INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))), +(11, REPEAT('b', CEIL(RAND() * 300))), +(12, REPEAT('c', CEIL(RAND() * 300))), +(13, REPEAT('d', CEIL(RAND() * 300))), +(14, REPEAT('e', CEIL(RAND() * 300))), +(15, REPEAT('f', CEIL(RAND() * 300))), +(16, REPEAT('g', CEIL(RAND() * 300))), +(17, REPEAT('h', CEIL(RAND() * 300))), +(18, REPEAT('i', CEIL(RAND() * 300))), +(19, REPEAT('j', CEIL(RAND() * 300))), +(20, REPEAT('k', CEIL(RAND() * 300))), +(21, REPEAT('l', CEIL(RAND() * 300))), +(22, REPEAT('m', CEIL(RAND() * 300))), +(23, REPEAT('n', CEIL(RAND() * 300))), +(24, REPEAT('o', CEIL(RAND() * 300))), +(25, REPEAT('p', CEIL(RAND() * 300))), +(26, REPEAT('q', CEIL(RAND() * 300))), +(27, REPEAT('r', CEIL(RAND() * 300))), +(28, REPEAT('s', CEIL(RAND() * 300))), +(29, REPEAT('t', CEIL(RAND() * 300))), +(30, REPEAT('u', CEIL(RAND() * 300))), +(31, REPEAT('v', CEIL(RAND() * 300))), +(32, REPEAT('w', CEIL(RAND() * 300))), +(33, REPEAT('x', CEIL(RAND() * 300))), +(34, REPEAT('y', CEIL(RAND() * 300))), +(35, REPEAT('z', CEIL(RAND() * 300))); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +DELETE FROM t1 WHERE c1 >= 10; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; End of 5.1 tests set global storage_engine=MyISAM; diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index fa9983690cb..4130831c482 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -939,6 +939,50 @@ create table t1 (a int not null, key key_block_size=1024 (a)); --error 1064 create table t1 (a int not null, key `a` key_block_size=1024 (a)); + +# +# Bug#22119 - Changing MI_KEY_BLOCK_LENGTH makes a wrong myisamchk +# +CREATE TABLE t1 ( + c1 INT, + c2 VARCHAR(300), + KEY (c1) KEY_BLOCK_SIZE 1024, + KEY (c2) KEY_BLOCK_SIZE 8192 + ); +INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))), + (11, REPEAT('b', CEIL(RAND() * 300))), + (12, REPEAT('c', CEIL(RAND() * 300))), + (13, REPEAT('d', CEIL(RAND() * 300))), + (14, REPEAT('e', CEIL(RAND() * 300))), + (15, REPEAT('f', CEIL(RAND() * 300))), + (16, REPEAT('g', CEIL(RAND() * 300))), + (17, REPEAT('h', CEIL(RAND() * 300))), + (18, REPEAT('i', CEIL(RAND() * 300))), + (19, REPEAT('j', CEIL(RAND() * 300))), + (20, REPEAT('k', CEIL(RAND() * 300))), + (21, REPEAT('l', CEIL(RAND() * 300))), + (22, REPEAT('m', CEIL(RAND() * 300))), + (23, REPEAT('n', CEIL(RAND() * 300))), + (24, REPEAT('o', CEIL(RAND() * 300))), + (25, REPEAT('p', CEIL(RAND() * 300))), + (26, REPEAT('q', CEIL(RAND() * 300))), + (27, REPEAT('r', CEIL(RAND() * 300))), + (28, REPEAT('s', CEIL(RAND() * 300))), + (29, REPEAT('t', CEIL(RAND() * 300))), + (30, REPEAT('u', CEIL(RAND() * 300))), + (31, REPEAT('v', CEIL(RAND() * 300))), + (32, REPEAT('w', CEIL(RAND() * 300))), + (33, REPEAT('x', CEIL(RAND() * 300))), + (34, REPEAT('y', CEIL(RAND() * 300))), + (35, REPEAT('z', CEIL(RAND() * 300))); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +CHECK TABLE t1; +REPAIR TABLE t1; +DELETE FROM t1 WHERE c1 >= 10; +CHECK TABLE t1; +DROP TABLE t1; + --echo End of 5.1 tests eval set global storage_engine=$default; diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 03802ba6989..f997b90a61c 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -228,11 +228,12 @@ static int check_k_link(HA_CHECK *param, register MARIA_HA *info, uint nr) my_off_t next_link; uint block_size=(nr+1)*MARIA_MIN_KEY_BLOCK_LENGTH; ha_rows records; - char llbuff[21],*buff; + char llbuff[21], llbuff2[21], *buff; DBUG_ENTER("check_k_link"); + DBUG_PRINT("enter", ("block_size: %u", block_size)); if (param->testflag & T_VERBOSE) - printf("block_size %4d:",block_size); + printf("block_size %4u:", block_size); /* purecov: tested */ next_link=info->s->state.key_del[nr]; records= (ha_rows) (info->state->key_file_length / block_size); @@ -242,14 +243,46 @@ static int check_k_link(HA_CHECK *param, register MARIA_HA *info, uint nr) DBUG_RETURN(1); if (param->testflag & T_VERBOSE) printf("%16s",llstr(next_link,llbuff)); - if (next_link > info->state->key_file_length || - next_link & (info->s->blocksize-1)) + + /* Key blocks must lay within the key file length entirely. */ + if (next_link + block_size > info->state->key_file_length) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(next_link, llbuff), block_size, + llstr(info->state->key_file_length, llbuff2)); DBUG_RETURN(1); + /* purecov: end */ + } + + /* Key blocks must be aligned at MARIA_MIN_KEY_BLOCK_LENGTH. */ + if (next_link & (MARIA_MIN_KEY_BLOCK_LENGTH - 1)) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(next_link, llbuff), MARIA_MIN_KEY_BLOCK_LENGTH); + DBUG_RETURN(1); + /* purecov: end */ + } + + /* + Read the key block with MARIA_MIN_KEY_BLOCK_LENGTH to find next link. + If the key cache block size is smaller than block_size, we can so + avoid unecessary eviction of cache block. + */ if (!(buff=key_cache_read(info->s->key_cache, info->s->kfile, next_link, DFLT_INIT_HITS, - (byte*) info->buff, - maria_block_size, block_size, 1))) + (byte*) info->buff, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MIN_KEY_BLOCK_LENGTH, 1))) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "key cache read error for block: %s", + llstr(next_link,llbuff)); DBUG_RETURN(1); + /* purecov: end */ + } next_link=mi_sizekorr(buff); records--; param->key_file_blocks+=block_size; @@ -533,17 +566,37 @@ static int chk_index_down(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo ha_checksum *key_checksum, uint level) { char llbuff[22],llbuff2[22]; - if (page > info->state->key_file_length || (page & (info->s->blocksize -1))) - { - my_off_t max_length=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); - _ma_check_print_error(param,"Wrong pagepointer: %s at page: %s", - llstr(page,llbuff),llstr(page,llbuff2)); + DBUG_ENTER("chk_index_down"); - if (page+info->s->blocksize > max_length) + /* Key blocks must lay within the key file length entirely. */ + if (page + keyinfo->block_length > info->state->key_file_length) + { + /* purecov: begin tested */ + /* Give it a chance to fit in the real file size. */ + my_off_t max_length= my_seek(info->s->kfile, 0L, MY_SEEK_END, MYF(0)); + _ma_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(page, llbuff), keyinfo->block_length, + llstr(info->state->key_file_length, llbuff2)); + if (page + keyinfo->block_length > max_length) goto err; - info->state->key_file_length=(max_length & - ~ (my_off_t) (info->s->blocksize-1)); + /* Fix the remebered key file length. */ + info->state->key_file_length= (max_length & + ~ (my_off_t) (keyinfo->block_length - 1)); + /* purecov: end */ } + + /* Key blocks must be aligned at MARIA_MIN_KEY_BLOCK_LENGTH. */ + if (page & (MARIA_MIN_KEY_BLOCK_LENGTH - 1)) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(page, llbuff), MARIA_MIN_KEY_BLOCK_LENGTH); + goto err; + /* purecov: end */ + } + if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0)) { _ma_check_print_error(param,"Can't read key from filepos: %s", @@ -554,9 +607,12 @@ static int chk_index_down(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level)) goto err; - return 0; + DBUG_RETURN(0); + + /* purecov: begin tested */ err: - return 1; + DBUG_RETURN(1); + /* purecov: end */ } From fe382a9fe18d4a76ab01bf9d7a98823cd1fc30ff Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 30 Oct 2006 16:58:18 +0100 Subject: [PATCH 50/95] fix dynarray_idxes_in_prev_level[] values --- mysys/lf_dynarray.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index a7a4968ddd8..d63af91813e 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -71,13 +71,15 @@ void lf_dynarray_destroy(LF_DYNARRAY *array) bzero(array, sizeof(*array)); } -static const int dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= { 0, /* +1 here to to avoid -1's below */ LF_DYNARRAY_LEVEL_LENGTH, - LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH + + LF_DYNARRAY_LEVEL_LENGTH, LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * - LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH }; /* From e39aae7ca73da4f8c22a78ee9024abdb18201969 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 8 Nov 2006 23:22:38 +0100 Subject: [PATCH 51/95] Maria: fix for a bug in the transaction's manager and a bug in the unit test of the control file module. storage/maria/trnman.c: fix for a bug (i has to be in [1..SHORT_TRID_MAX]) storage/maria/unittest/ma_control_file-t.c: corrupted checksum is in buffer[0], not in buffer[1] --- storage/maria/trnman.c | 2 +- storage/maria/unittest/ma_control_file-t.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index e18e906a204..1c5281a3449 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -163,7 +163,7 @@ static TrID new_trid() static void set_short_trid(TRN *trn) { - int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX; + int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX + 1; my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn); for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */ { diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index 3ea6932c754..beb86843dd3 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -346,13 +346,13 @@ static int test_bad_checksum() MYF(MY_WME))) >= 0); RET_ERR_UNLESS(my_pread(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); buffer[0]+= 3; /* mangle checksum */ - RET_ERR_UNLESS(my_pwrite(fd, buffer+1, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_BAD_CHECKSUM); /* Restore checksum */ buffer[0]-= 3; - RET_ERR_UNLESS(my_pwrite(fd, buffer+1, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); return 0; From 96d3604d99f366903c0e3e543858957211071d5a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 9 Nov 2006 16:20:40 +0100 Subject: [PATCH 52/95] lock manager optimized for table locks storage/maria/unittest/lockman1-t.c: New BitKeeper file ``storage/maria/unittest/lockman1-t.c'' storage/maria/tablockman.c: New BitKeeper file ``storage/maria/tablockman.c'' storage/maria/tablockman.h: New BitKeeper file ``storage/maria/tablockman.h'' storage/maria/unittest/lockman2-t.c: New BitKeeper file ``storage/maria/unittest/lockman2-t.c'' --- storage/maria/Makefile.am | 4 +- storage/maria/lockman.c | 55 ++-- storage/maria/tablockman.c | 399 ++++++++++++++++++++++++++++ storage/maria/tablockman.h | 84 ++++++ storage/maria/unittest/Makefile.am | 2 +- storage/maria/unittest/lockman-t.c | 2 +- storage/maria/unittest/lockman1-t.c | 330 +++++++++++++++++++++++ storage/maria/unittest/lockman2-t.c | 318 ++++++++++++++++++++++ 8 files changed, 1165 insertions(+), 29 deletions(-) create mode 100644 storage/maria/tablockman.c create mode 100644 storage/maria/tablockman.h create mode 100644 storage/maria/unittest/lockman1-t.c create mode 100644 storage/maria/unittest/lockman2-t.c diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index ee5f6238b46..24636f139ab 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -53,7 +53,7 @@ maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ - ma_ft_eval.h trnman.h lockman.h \ + ma_ft_eval.h trnman.h lockman.h tablockman.h \ ma_control_file.h ha_maria.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ @@ -108,7 +108,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_keycache.c ma_preload.c ma_ft_parser.c \ ma_ft_update.c ma_ft_boolean_search.c \ ma_ft_nlq_search.c ft_maria.c ma_sort.c \ - ha_maria.cc trnman.c lockman.c \ + ha_maria.cc trnman.c lockman.c tablockman.c \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ ma_sp_key.c ma_control_file.c CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 937c61021cc..31867d3903d 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -1,10 +1,6 @@ +// TODO - allocate everything from dynarrays !!! (benchmark) // TODO instant duration locks // automatically place S instead of LS if possible -/* - TODO optimization: table locks - they have completely - different characteristics. long lists, few distinct resources - - slow to scan, [possibly] high retry rate -*/ /* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify @@ -68,9 +64,9 @@ it will wait for other locks. Here's an exception to "locks are added to the end" rule - upgraded locks are added after the last active lock but before all waiting locks. Old lock (the one we upgraded from) is - not removed from the list, indeed we may need to return to it later if - the new lock was in a savepoint that gets rolled back. So old lock is - marked as "ignored" (IGNORE_ME flag). New lock gets an UPGRADED flag. + not removed from the list, indeed it may be needed if the new lock was + in a savepoint that gets rolled back. So old lock is marked as "ignored" + (IGNORE_ME flag). New lock gets an UPGRADED flag. Loose locks add an important exception to the above. Loose locks do not always commute with other locks. In the list IX-LS both locks are active, @@ -90,12 +86,12 @@ variable a conflicting lock is returned and the calling thread waits on a pthread condition in the LOCK_OWNER structure of the owner of the conflicting lock. Or a new lock is compatible with all locks, but some - existing locks are not compatible with previous locks (example: request IS, + existing locks are not compatible with each other (example: request IS, when the list is S-IX) - that is not all locks are active. In this case a - first waiting lock is returned in the 'blocker' variable, - lockman_getlock() notices that a "blocker" does not conflict with the - requested lock, and "dereferences" it, to find the lock that it's waiting - on. The calling thread than begins to wait on the same lock. + first waiting lock is returned in the 'blocker' variable, lockman_getlock() + notices that a "blocker" does not conflict with the requested lock, and + "dereferences" it, to find the lock that it's waiting on. The calling + thread than begins to wait on the same lock. To better support table-row relations where one needs to lock the table with an intention lock before locking the row, extended diagnostics is @@ -107,6 +103,10 @@ whether it's possible to lock the row, but no need to lock it - perhaps the thread has a loose lock on this table). This is defined by getlock_result[] table. + + TODO optimization: table locks - they have completely + different characteristics. long lists, few distinct resources - + slow to scan, [possibly] high retry rate */ #include @@ -316,7 +316,7 @@ retry: DBUG_ASSERT(prev_active == TRUE); else cur_active&= lock_compatibility_matrix[prev_lock][cur_lock]; - if (upgrading && !cur_active) + if (upgrading && !cur_active /*&& !(cur_flags & UPGRADED)*/) break; if (prev_active && !cur_active) { @@ -327,7 +327,7 @@ retry: { /* we already have a lock on this resource */ DBUG_ASSERT(lock_combining_matrix[cur_lock][lock] != N); - DBUG_ASSERT(!upgrading); /* can happen only once */ + DBUG_ASSERT(!upgrading || (flags & IGNORE_ME)); if (lock_combining_matrix[cur_lock][lock] == cur_lock) { /* new lock is compatible */ @@ -380,7 +380,7 @@ retry: */ if (upgrading) { - if (compatible) + if (compatible /*&& prev_active*/) return PLACE_NEW_DISABLE_OLD; else return REQUEST_NEW_DISABLE_OLD; @@ -431,6 +431,9 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, } if (res & LOCK_UPGRADE) cursor.upgrade_from->flags|= IGNORE_ME; +#warning is this OK ? if a reader has already read upgrade_from, \ + it may find it conflicting with node :( +//#error another bug - see the last test from test_lockman_simple() } } while (res == REPEAT_ONCE_MORE); @@ -439,8 +442,8 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, _lf_unpin(pins, 2); /* note that blocker is not necessarily pinned here (when it's == curr). - this is ok as it's either a dummy node then for initialize_bucket - and dummy nodes don't need pinning, + this is ok as in such a case it's either a dummy node for + initialize_bucket() and dummy nodes don't need pinning, or it's a lock of the same transaction for lockman_getlock, and it cannot be removed by another thread */ @@ -484,9 +487,15 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) res= lockfind(head, node, &cursor, pins); DBUG_ASSERT(res & ALREADY_HAVE); - if (cursor.upgrade_from) - cursor.upgrade_from->flags&= ~IGNORE_ME; - + /* + XXX this does not work with savepoints, as old lock is left ignored. + It cannot be unignored, as would basically mean moving the lock back + in the lock chain (from upgraded). And the latter is not allowed - + because it breaks list scanning. So old ignored lock must be deleted, + new - same - lock must be installed right after the lock we're deleting, + then we can delete. Good news is - this is only required when rolling + back a savepoint. + */ if (my_atomic_casptr((void **)&(cursor.curr->link), (void **)&cursor.next, 1+(char *)cursor.next)) { @@ -497,11 +506,7 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) lockfind(head, node, &cursor, pins); } else - { res= REPEAT_ONCE_MORE; - if (cursor.upgrade_from) /* to satisfy the assert in lockfind */ - cursor.upgrade_from->flags|= IGNORE_ME; - } } while (res == REPEAT_ONCE_MORE); _lf_unpin(pins, 0); _lf_unpin(pins, 1); diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c new file mode 100644 index 00000000000..d04bbbab0e4 --- /dev/null +++ b/storage/maria/tablockman.c @@ -0,0 +1,399 @@ +// TODO - allocate everything from dynarrays !!! (benchmark) +// TODO instant duration locks +// automatically place S instead of LS if possible +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include +#include +#include +#include +#include "tablockman.h" + +/* + Lock compatibility matrix. + + It's asymmetric. Read it as "Somebody has the lock , can I set the lock ?" + + ') Though you can take LS lock while somebody has S lock, it makes no + sense - it's simpler to take S lock too. + + 1 - compatible + 0 - incompatible + -1 - "impossible", so that we can assert the impossibility. +*/ +static int lock_compatibility_matrix[10][10]= +{ /* N S X IS IX SIX LS LX SLX LSIX */ + { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ + { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ + { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */ +}; + +/* + Lock combining matrix. + + It's symmetric. Read it as "what lock level L is identical to the + set of two locks A and B" + + One should never get N from it, we assert the impossibility +*/ +static enum lock_type lock_combining_matrix[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */ + { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { X, X, X, X, X, X, X, X, X, X}, /* X */ + { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ +}; + +/* + the return codes for lockman_getlock + + It's asymmetric. Read it as "I have the lock , + what value should be returned for ?" + + 0 means impossible combination (assert!) + + Defines below help to preserve the table structure. + I/L/A values are self explanatory + x means the combination is possible (assert should not crash) + but it cannot happen in row locks, only in table locks (S,X), + or lock escalations (LS,LX) +*/ +#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE +#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +#define A GOT_THE_LOCK +#define x GOT_THE_LOCK +static enum lockman_getlock_result getlock_result[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ + { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ + { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */ + { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */ + { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */ + { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */ + { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */ + { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */ + { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */ + { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */ +}; +#undef I +#undef L +#undef A +#undef x + +/* + this structure is optimized for a case when there're many locks + on the same resource - e.g. a table +*/ + +struct st_table_lock { + struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev; + struct st_locked_table *table; + uint16 loid; + char lock_type; +}; + +#define hash_insert my_hash_insert /* for consistency :) */ + +enum lockman_getlock_result +tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, enum lock_type lock) +{ + TABLE_LOCK *old, *new, *blocker; + TABLE_LOCK_OWNER *wait_for; + int i; + ulonglong deadline; + struct timespec timeout; + enum lock_type new_lock; + + pthread_mutex_lock(& table->mutex); + old= (TABLE_LOCK *)hash_search(& table->active, (byte *)&lo->loid, + sizeof(lo->loid)); + + /* perhaps we have the lock already ? */ + if (old && lock_combining_matrix[old->lock_type][lock] == old->lock_type) + { + pthread_mutex_unlock(& table->mutex); + return getlock_result[old->lock_type][lock]; + } + + pthread_mutex_lock(& lm->pool_mutex); + new= lm->pool; + if (new) + { + lm->pool= new->next; + pthread_mutex_unlock(& lm->pool_mutex); + } + else + { + pthread_mutex_unlock(& lm->pool_mutex); + new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME)); + if (!new) + { + pthread_mutex_unlock(& table->mutex); + return DIDNT_GET_THE_LOCK; + } + } + + /* calculate required upgraded lock type */ + new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; + + new->loid= lo->loid; + new->lock_type= new_lock; + new->table= table; + + for (new->next= table->wait_queue_in ; ; ) + { + if (!old && new->next) + { + /* need to wait */ + DBUG_ASSERT(table->wait_queue_out); + DBUG_ASSERT(table->wait_queue_in); + blocker= new->next; + if (lock_compatibility_matrix[blocker->lock_type][lock]) + wait_for= lm->loid_to_lo(blocker->loid)->waiting_for; + else + wait_for= lm->loid_to_lo(blocker->loid); + } + else + { + /* checking for compatibility with existing locks */ + for (blocker= 0, i= 0; i < LOCK_TYPES; i++) + { + if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock]) + { + for (blocker= table->active_locks[i]; + blocker && blocker->loid == lo->loid; + blocker= blocker->next); + if (blocker) + break; + } + } + if (!blocker) + break; + wait_for= lm->loid_to_lo(blocker->loid); + } + + lo->waiting_for= wait_for; + if (!lo->waiting_lock) /* first iteration */ + { + /* lock upgrade or new lock request ? */ + if (old) + { + new->prev= 0; + if ((new->next= table->wait_queue_out)) + new->next->prev= new; + table->wait_queue_out= new; + if (!table->wait_queue_in) + table->wait_queue_in=table->wait_queue_out; + } + else + { + new->next= 0; + if ((new->prev= table->wait_queue_in)) + new->prev->next= new; + table->wait_queue_in= new; + if (!table->wait_queue_out) + table->wait_queue_out=table->wait_queue_in; + } + + lo->waiting_lock= new; + + deadline= my_getsystime() + lm->lock_timeout * 10000; + timeout.tv_sec= deadline/10000000; + timeout.tv_nsec= (deadline % 10000000) * 100; + } + else + { + if (my_getsystime() > deadline) + { + pthread_mutex_unlock(& table->mutex); + return DIDNT_GET_THE_LOCK; + } + } + + pthread_mutex_lock(wait_for->mutex); + pthread_mutex_unlock(& table->mutex); + + pthread_cond_timedwait(wait_for->cond, wait_for->mutex, &timeout); + + pthread_mutex_unlock(wait_for->mutex); + pthread_mutex_lock(& table->mutex); + } + + if (lo->waiting_lock) + { + if (new->prev) + new->prev->next= new->next; + if (new->next) + new->next->prev= new->prev; + if (table->wait_queue_in == new) + table->wait_queue_in= new->prev; + if (table->wait_queue_out == new) + table->wait_queue_out= new->next; + + lo->waiting_lock= 0; + } + + new->next_in_lo= lo->active_locks; + lo->active_locks= new; + + new->prev= 0; + if ((new->next= table->active_locks[new_lock-1])) + new->next->prev= new; + table->active_locks[new_lock-1]= new; + + /* placing the lock */ + hash_insert(& table->active, (byte *)new); + if (old) + { + new->upgraded_from= old; + hash_delete(& table->active, (byte *)old); + } + else + new->upgraded_from= 0; + + pthread_mutex_unlock(& table->mutex); + return getlock_result[lock][lock]; +} + +void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK *lock, *tmp, *local_pool= 0, *local_pool_end; + + local_pool_end= lo->waiting_lock ? lo->waiting_lock : lo->active_locks; + if (!local_pool_end) + return; + + if ((lock= lo->waiting_lock)) + { + pthread_mutex_lock(& lock->table->mutex); + + if (lock->prev) + lock->prev->next= lock->next; + if (lock->next) + lock->next->prev= lock->prev; + if (lock->table->wait_queue_in == lock) + lock->table->wait_queue_in= lock->prev; + if (lock->table->wait_queue_out == lock) + lock->table->wait_queue_out= lock->next; + pthread_mutex_unlock(& lock->table->mutex); + + lock->next= local_pool; + local_pool= lock; + DBUG_ASSERT(lock->loid == lo->loid); + } + + lock= lo->active_locks; + while (lock) + { + TABLE_LOCK *cur= lock; + pthread_mutex_t *mutex= & lock->table->mutex; + + lock= lock->next_in_lo; + + pthread_mutex_lock(mutex); + hash_delete(& cur->table->active, (byte *)cur); + + if (cur->prev) + cur->prev->next= cur->next; + if (cur->next) + cur->next->prev= cur->prev; + if (cur->table->active_locks[cur->lock_type-1] == cur) + cur->table->active_locks[cur->lock_type-1]= cur->next; + + cur->next= local_pool; + local_pool= cur; + DBUG_ASSERT(cur->loid == lo->loid); + + pthread_mutex_unlock(mutex); + } + + lo->waiting_lock= lo->active_locks= 0; + + pthread_mutex_lock(lo->mutex); + pthread_cond_broadcast(lo->cond); + pthread_mutex_unlock(lo->mutex); + + pthread_mutex_lock(& lm->pool_mutex); + local_pool_end->next= lm->pool; + lm->pool= local_pool; + pthread_mutex_unlock(& lm->pool_mutex); +} + +void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) +{ + lm->pool= 0; + lm->loid_to_lo= func; + lm->lock_timeout= timeout; + pthread_mutex_init(&lm->pool_mutex, MY_MUTEX_INIT_FAST); +} + +void tablockman_destroy(TABLOCKMAN *lm) +{ + while (lm->pool) + { + TABLE_LOCK *tmp= lm->pool; + lm->pool= tmp->next; + my_free((void *)tmp, MYF(0)); + } + pthread_mutex_destroy(&lm->pool_mutex); +} + +void tablockman_init_locked_table(LOCKED_TABLE *lt) +{ + TABLE_LOCK *unused; + bzero(lt, sizeof(*lt)); + pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST); + hash_init(& lt->active, &my_charset_bin, 10/*FIXME*/, + offsetof(TABLE_LOCK, loid), sizeof(unused->loid), 0, 0, 0); +} + +void tablockman_destroy_locked_table(LOCKED_TABLE *lt) +{ + hash_free(& lt->active); + pthread_mutex_destroy(& lt->mutex); +} + +static char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", + "LS", "LX", "SLX", "LSIX"}; + +void print_tlo(TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK *lock; + printf("lo%d>", lo->loid); + if ((lock= lo->waiting_lock)) + printf(" (%s.%p)", lock2str[lock->lock_type], lock->table); + for (lock= lo->active_locks; lock && lock != lock->next_in_lo; lock= lock->next_in_lo) + printf(" %s.%p", lock2str[lock->lock_type], lock->table); + if (lock && lock == lock->next_in_lo) + printf("!"); + printf("\n"); +} + diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h new file mode 100644 index 00000000000..4b2d165af54 --- /dev/null +++ b/storage/maria/tablockman.h @@ -0,0 +1,84 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _tablockman_h +#define _tablockman_h + +/* + Lock levels: + ^^^^^^^^^^^ + + N - "no lock", not a lock, used sometimes internally to simplify the code + S - Shared + X - eXclusive + IS - Intention Shared + IX - Intention eXclusive + SIX - Shared + Intention eXclusive + LS - Loose Shared + LX - Loose eXclusive + SLX - Shared + Loose eXclusive + LSIX - Loose Shared + Intention eXclusive +*/ +#ifndef _lockman_h +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX }; +enum lockman_getlock_result { + DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK, + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +}; + +#endif + +#define LOCK_TYPES LSIX + +typedef struct st_table_lock_owner TABLE_LOCK_OWNER; +typedef struct st_table_lock TABLE_LOCK; +typedef struct st_locked_table LOCKED_TABLE; +typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16); + +typedef struct { + pthread_mutex_t pool_mutex; + TABLE_LOCK *pool; + uint lock_timeout; + loid_to_tlo_func *loid_to_lo; +} TABLOCKMAN; + +struct st_table_lock_owner { + TABLE_LOCK *active_locks, *waiting_lock; + TABLE_LOCK_OWNER *waiting_for; + pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid; +}; + +struct st_locked_table { + pthread_mutex_t mutex; + HASH active; // fast to remove + TABLE_LOCK *active_locks[LOCK_TYPES]; // fast to see a conflict + TABLE_LOCK *wait_queue_in, *wait_queue_out; +}; + +void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint); +void tablockman_destroy(TABLOCKMAN *); +enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *, + LOCKED_TABLE *, + enum lock_type lock); +void tablockman_release_locks(TABLOCKMAN *, TABLE_LOCK_OWNER *); +void tablockman_init_locked_table(LOCKED_TABLE *); +void print_tlo(TABLE_LOCK_OWNER *); + +#endif + diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index e29bc7f86cb..d0b247d65e1 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -25,5 +25,5 @@ LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t +noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t lockman1-t lockman2-t CLEANFILES = maria_control diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index 7b37c983864..94b034bdaad 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -268,7 +268,7 @@ int main() test_lockman_simple(); -#define CYCLES 1000 +#define CYCLES 10000 #define THREADS Nlos /* don't change this line */ /* mixed load, stress-test with random locks */ diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c new file mode 100644 index 00000000000..c9a4ff98f2a --- /dev/null +++ b/storage/maria/unittest/lockman1-t.c @@ -0,0 +1,330 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +//#define EXTRA_VERBOSE + +#include + +#include +#include +#include +#include +#include "../lockman.h" +#include "../tablockman.h" + +#define Nlos 100 +#define Ntbls 10 +LOCK_OWNER loarray[Nlos]; +TABLE_LOCK_OWNER loarray1[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKED_TABLE ltarray[Ntbls]; +LOCKMAN lockman; +TABLOCKMAN tablockman; + +#ifndef EXTRA_VERBOSE +#define print_lo1(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +LOCK_OWNER *loid2lo(uint16 loid) +{ + return loarray+loid-1; +} +TABLE_LOCK_OWNER *loid2lo1(uint16 loid) +{ + return loarray1+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + tablockman_release_locks(&tablockman, loid2lo1(O)); +#define test_lock(O, R, L, S, RES) \ + ok(tablockman_getlock(&tablockman, loid2lo1(O), <array[R], L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lo1(loid2lo1(O)); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); + +void test_tablockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[4]= { + "DIDN'T GET THE LOCK", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + LOCK_OWNER *lo; TABLE_LOCK_OWNER *lo1; DBUG_ASSERT(Ntables <= Ntbls); + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo= loid2lo(loid); lo1= loid2lo1(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case DIDNT_GET_THE_LOCK: + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + DBUG_ASSERT(0); + } + } + } + + lockman_release_locks(&lockman, lo); + tablockman_release_locks(&tablockman, lo1); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(35); + + if (my_atomic_initialize()) + return exit_status(); + + + lockman_init(&lockman, &loid2lo, 50); + tablockman_init(&tablockman, &loid2lo1, 50); + + for (i= 0; i < Nlos; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + + loarray[i].pins= lf_alloc_get_pins(&lockman.alloc); + loarray[i].all_locks= 0; + loarray[i].waiting_for= 0; + loarray[i].mutex= &mutexes[i]; + loarray[i].cond= &conds[i]; + loarray[i].loid= i+1; + + loarray1[i].active_locks= 0; + loarray1[i].waiting_lock= 0; + loarray1[i].waiting_for= 0; + loarray1[i].mutex= &mutexes[i]; + loarray1[i].cond= &conds[i]; + loarray1[i].loid= i+1; + } + + for (i= 0; i < Ntbls; i++) + { + tablockman_init_locked_table(ltarray+i); + } + + //test_tablockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); + + for (i= 0; i < Nlos; i++) + { + lockman_release_locks(&lockman, &loarray[i]); + pthread_mutex_destroy(loarray[i].mutex); + pthread_cond_destroy(loarray[i].cond); + lf_pinbox_put_pins(loarray[i].pins); + } + + { + ulonglong now= my_getsystime(); + lockman_destroy(&lockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c new file mode 100644 index 00000000000..72b3993e4b4 --- /dev/null +++ b/storage/maria/unittest/lockman2-t.c @@ -0,0 +1,318 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +//#define EXTRA_VERBOSE + +#include + +#include +#include +#include +#include +#include "../tablockman.h" + +#define Nlos 100 +#define Ntbls 110 +TABLE_LOCK_OWNER loarray1[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKED_TABLE ltarray[Ntbls]; +TABLOCKMAN tablockman; + +#ifndef EXTRA_VERBOSE +#define print_lo1(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +TABLE_LOCK_OWNER *loid2lo1(uint16 loid) +{ + return loarray1+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + tablockman_release_locks(&tablockman, loid2lo1(O)); +#define test_lock(O, R, L, S, RES) \ + ok(tablockman_getlock(&tablockman, loid2lo1(O), <array[R], L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lo1(loid2lo1(O)); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); + +void test_tablockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[4]= { + "DIDN'T GET THE LOCK", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + TABLE_LOCK_OWNER *lo1; + DBUG_ASSERT(Ntables <= Ntbls); + DBUG_ASSERT(Nrows + Ntables <= Ntbls); + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo1= loid2lo1(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case DIDNT_GET_THE_LOCK: + tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + DBUG_ASSERT(0); + } + } + } + + tablockman_release_locks(&tablockman, lo1); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(35); + + if (my_atomic_initialize()) + return exit_status(); + + + tablockman_init(&tablockman, &loid2lo1, 50); + + for (i= 0; i < Nlos; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + + loarray1[i].active_locks= 0; + loarray1[i].waiting_lock= 0; + loarray1[i].waiting_for= 0; + loarray1[i].mutex= &mutexes[i]; + loarray1[i].cond= &conds[i]; + loarray1[i].loid= i+1; + } + + for (i= 0; i < Ntbls; i++) + { + tablockman_init_locked_table(ltarray+i); + } + + test_tablockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); +#if 0 + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); +#endif + for (i= 0; i < Nlos; i++) + { + tablockman_release_locks(&tablockman, &loarray1[i]); + pthread_mutex_destroy(loarray1[i].mutex); + pthread_cond_destroy(loarray1[i].cond); + } + + { + ulonglong now= my_getsystime(); + for (i= 0; i < Ntbls; i++) + { + tablockman_destroy_locked_table(ltarray+i); + } + tablockman_destroy(&tablockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + From aad040fde0bfdbfbf2876dba8d7034333493fbf2 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 10 Nov 2006 10:56:05 +0100 Subject: [PATCH 53/95] comments storage/maria/tablockman.c: comments. bugfix - a special case in release_locks storage/maria/unittest/lockman1-t.c: updated storage/maria/unittest/lockman2-t.c: new tests --- storage/maria/tablockman.c | 238 +++++++++++++++++++++++----- storage/maria/tablockman.h | 32 ++-- storage/maria/unittest/lockman1-t.c | 4 +- storage/maria/unittest/lockman2-t.c | 18 ++- 4 files changed, 233 insertions(+), 59 deletions(-) diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c index d04bbbab0e4..608f852cb7b 100644 --- a/storage/maria/tablockman.c +++ b/storage/maria/tablockman.c @@ -1,5 +1,4 @@ // TODO - allocate everything from dynarrays !!! (benchmark) -// TODO instant duration locks // automatically place S instead of LS if possible /* Copyright (C) 2006 MySQL AB @@ -23,6 +22,93 @@ #include #include "tablockman.h" +/* + Lock Manager for Table Locks + + The code below handles locks on resources - but it is optimized for a + case when a number of resources is not very large, and there are many of + locks per resource - that is a resource is likely to be a table or a + database, but hardly a row in a table. + + Locks belong to "lock owners". A Lock Owner is uniquely identified by a + 16-bit number - loid (lock owner identifier). A function loid_to_tlo must + be provided by the application that takes such a number as an argument + and returns a TABLE_LOCK_OWNER structure. + + Lock levels are completely defined by three tables. Lock compatibility + matrix specifies which locks can be held at the same time on a resource. + Lock combining matrix specifies what lock level has the same behaviour as + a pair of two locks of given levels. getlock_result matrix simplifies + intention locking and lock escalation for an application, basically it + defines which locks are intention locks and which locks are "loose" + locks. It is only used to provide better diagnostics for the + application, lock manager itself does not differentiate between normal, + intention, and loose locks. + + The assumptions are: few distinct resources, many locks are held at the + same time on one resource. Thus: a lock structure _per resource_ can be + rather large; a lock structure _per lock_ does not need to be very small + either; we need to optimize for _speed_. Operations we need are: place a + lock, check if a particular transaction already has a lock on this + resource, check if a conflicting lock exists, if yes - find who owns it. + + Solution: every resource has a structure with + 1. Hash of "active" (see below for the description of "active") granted + locks with loid as a key. Thus, checking if a given transaction has a + lock on this resource is O(1) operation. + 2. Doubly-linked lists of all granted locks - one list for every lock + type. Thus, checking if a conflicting lock exists is a check whether + an appropriate list head pointer is not null, also O(1). + 3. Every lock has a loid of the owner, thus checking who owns a + conflicting lock is also O(1). + 4. Deque of waiting locks. It's a deque not a fifo, because for lock + upgrades requests are added to the queue head, not tail. There's never + a need to scan the queue. + + Result: adding or removing a lock is always a O(1) operation, it does not + depend on the number of locks on the resource, or number of transactions, + or number of resources. It _does_ depend on the number of different lock + levels - O(number_of_lock_levels) - but it's a constant. + + Waiting: if there is a conflicting lock or if wait queue is not empty, a + requested lock cannot be granted at once. It is added to the end of the + wait queue. If there is a conflicting lock - the "blocker" transaction is + the owner of this lock. If there's no conflict but a queue was not empty, + than the "blocker" is the transaction that the owner of the lock at the + end of the queue is waiting for (in other words, our lock is added to the + end of the wait queue, and our blocker is the same as of the lock right + before us). + + Lock upgrades: when a thread that has a lock on a given resource, + requests a new lock on the same resource and the old lock is not enough + to satisfy new lock requirements (which is defined by + lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock + (defineded by lock_combining_matrix as above) is placed. Depending on + other granted locks it is immediately active or it has to wait. Here the + lock is added to the start of the waiting queue, not to the end. Old + lock, is removed from the hash, but not from the doubly-linked lists. + (indeed, a transaction checks "do I have a lock on this resource ?" by + looking in a hash, and it should find a latest lock, so old locks must be + removed; but a transaction checks "are the conflicting locks ?" by + checking doubly-linked lists, it doesn't matter if it will find an old + lock - if it would be removed, a new lock would be also a conflict). + + To better support table-row relations where one needs to lock the table + with an intention lock before locking the row, extended diagnostics is + provided. When an intention lock (presumably on a table) is granted, + lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row, + perhaps the thread already has a normal lock on this table), + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual), + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check + whether it's possible to lock the row, but no need to lock it - perhaps + the thread has a loose lock on this table). This is defined by + getlock_result[] table. + + Instant duration locks are not supported. Though they're trivial to add, + they are normally only used on rows, not on tables. So, presumably, + they are not needed here. +*/ + /* Lock compatibility matrix. @@ -121,28 +207,63 @@ struct st_table_lock { }; #define hash_insert my_hash_insert /* for consistency :) */ +#define remove_from_wait_queue(LOCK, TABLE) \ + do \ + { \ + if ((LOCK)->prev) \ + { \ + DBUG_ASSERT((TABLE)->wait_queue_out != (LOCK)); \ + (LOCK)->prev->next= (LOCK)->next; \ + } \ + else \ + { \ + DBUG_ASSERT((TABLE)->wait_queue_out == (LOCK)); \ + (TABLE)->wait_queue_out= (LOCK)->next; \ + } \ + if ((LOCK)->next) \ + { \ + DBUG_ASSERT((TABLE)->wait_queue_in != (LOCK)); \ + (LOCK)->next->prev= (LOCK)->prev; \ + } \ + else \ + { \ + DBUG_ASSERT((TABLE)->wait_queue_in == (LOCK)); \ + (TABLE)->wait_queue_in= (LOCK)->prev; \ + } \ + } while (0) +/* + DESCRIPTION + tries to lock a resource 'table' with a lock level 'lock'. + + RETURN + see enum lockman_getlock_result +*/ enum lockman_getlock_result -tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, enum lock_type lock) +tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, + LOCKED_TABLE *table, enum lock_type lock) { TABLE_LOCK *old, *new, *blocker; TABLE_LOCK_OWNER *wait_for; - int i; ulonglong deadline; struct timespec timeout; enum lock_type new_lock; + int i; pthread_mutex_lock(& table->mutex); + /* do we alreasy have a lock on this resource ? */ old= (TABLE_LOCK *)hash_search(& table->active, (byte *)&lo->loid, sizeof(lo->loid)); - /* perhaps we have the lock already ? */ + /* and if yes, is it enough to satisfy the new request */ if (old && lock_combining_matrix[old->lock_type][lock] == old->lock_type) { + /* yes */ pthread_mutex_unlock(& table->mutex); return getlock_result[old->lock_type][lock]; } + /* no, placing a new lock. first - take a free lock structure from the pool */ pthread_mutex_lock(& lm->pool_mutex); new= lm->pool; if (new) @@ -161,25 +282,28 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en } } - /* calculate required upgraded lock type */ + /* calculate the level of the upgraded lock */ new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; new->loid= lo->loid; new->lock_type= new_lock; new->table= table; - for (new->next= table->wait_queue_in ; ; ) + /* and try to place it */ + for (new->prev= table->wait_queue_in ; ; ) { - if (!old && new->next) + /* waiting queue is not empty and we're not upgrading */ + if (!old && new->prev) { /* need to wait */ DBUG_ASSERT(table->wait_queue_out); DBUG_ASSERT(table->wait_queue_in); - blocker= new->next; + blocker= new->prev; + /* wait for a previous lock in the queue or for a lock it's waiting for */ if (lock_compatibility_matrix[blocker->lock_type][lock]) - wait_for= lm->loid_to_lo(blocker->loid)->waiting_for; + wait_for= lm->loid_to_tlo(blocker->loid)->waiting_for; else - wait_for= lm->loid_to_lo(blocker->loid); + wait_for= lm->loid_to_tlo(blocker->loid); } else { @@ -188,24 +312,27 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en { if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock]) { + /* the first lock in the list may be our own - skip it */ for (blocker= table->active_locks[i]; blocker && blocker->loid == lo->loid; - blocker= blocker->next); + blocker= blocker->next) /* no-op */; if (blocker) break; } } - if (!blocker) + if (!blocker) /* free to go */ break; - wait_for= lm->loid_to_lo(blocker->loid); + wait_for= lm->loid_to_tlo(blocker->loid); } + /* ok, we're here - the wait is inevitable */ lo->waiting_for= wait_for; - if (!lo->waiting_lock) /* first iteration */ + if (!lo->waiting_lock) /* first iteration of the for() loop */ { /* lock upgrade or new lock request ? */ if (old) { + /* upgrade - add the lock to the _start_ of the wait queue */ new->prev= 0; if ((new->next= table->wait_queue_out)) new->next->prev= new; @@ -215,6 +342,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en } else { + /* new lock - add the lock to the _end_ of the wait queue */ new->next= 0; if ((new->prev= table->wait_queue_in)) new->prev->next= new; @@ -222,7 +350,6 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en if (!table->wait_queue_out) table->wait_queue_out=table->wait_queue_in; } - lo->waiting_lock= new; deadline= my_getsystime() + lm->lock_timeout * 10000; @@ -238,6 +365,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en } } + /* now really wait */ pthread_mutex_lock(wait_for->mutex); pthread_mutex_unlock(& table->mutex); @@ -245,32 +373,30 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en pthread_mutex_unlock(wait_for->mutex); pthread_mutex_lock(& table->mutex); - } + /* ... and repeat from the beginning */ + } + /* yeah! we can place the lock now */ + + /* remove the lock from the wait queue, if it was there */ if (lo->waiting_lock) { - if (new->prev) - new->prev->next= new->next; - if (new->next) - new->next->prev= new->prev; - if (table->wait_queue_in == new) - table->wait_queue_in= new->prev; - if (table->wait_queue_out == new) - table->wait_queue_out= new->next; - + remove_from_wait_queue(new, table); lo->waiting_lock= 0; + lo->waiting_for= 0; } + /* add it to the list of all locks of this lock owner */ new->next_in_lo= lo->active_locks; lo->active_locks= new; + /* and to the list of active locks of this lock type */ new->prev= 0; if ((new->next= table->active_locks[new_lock-1])) new->next->prev= new; table->active_locks[new_lock-1]= new; - /* placing the lock */ - hash_insert(& table->active, (byte *)new); + /* remove the old lock from the hash, if upgrading */ if (old) { new->upgraded_from= old; @@ -279,45 +405,69 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, en else new->upgraded_from= 0; + /* and add a new lock to the hash, voila */ + hash_insert(& table->active, (byte *)new); + pthread_mutex_unlock(& table->mutex); return getlock_result[lock][lock]; } +/* + DESCRIPTION + release all locks belonging to a transaction. + signal waiters to continue +*/ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) { - TABLE_LOCK *lock, *tmp, *local_pool= 0, *local_pool_end; + TABLE_LOCK *lock, *local_pool= 0, *local_pool_end; + /* + instead of adding released locks to a pool one by one, we'll link + them in a list and add to a pool in one short action (under a mutex) + */ local_pool_end= lo->waiting_lock ? lo->waiting_lock : lo->active_locks; if (!local_pool_end) return; + /* release a waiting lock, if any */ if ((lock= lo->waiting_lock)) { + DBUG_ASSERT(lock->loid == lo->loid); pthread_mutex_lock(& lock->table->mutex); + remove_from_wait_queue(lock, lock->table); - if (lock->prev) - lock->prev->next= lock->next; - if (lock->next) - lock->next->prev= lock->prev; - if (lock->table->wait_queue_in == lock) - lock->table->wait_queue_in= lock->prev; - if (lock->table->wait_queue_out == lock) - lock->table->wait_queue_out= lock->next; + /* + a special case: if this lock was not the last in the wait queue + and it's compatible with the next lock, than the next lock + is waiting for our blocker though really it waits for us, indirectly. + Signal our blocker to release this next lock (after we removed our + lock from the wait queue, of course). + */ + if (lock->prev && + lock_compatibility_matrix[lock->prev->lock_type][lock->lock_type]) + { + pthread_mutex_lock(lo->waiting_for->mutex); + pthread_cond_broadcast(lo->waiting_for->cond); + pthread_mutex_unlock(lo->waiting_for->mutex); + } + lo->waiting_for= 0; pthread_mutex_unlock(& lock->table->mutex); lock->next= local_pool; local_pool= lock; - DBUG_ASSERT(lock->loid == lo->loid); } + /* now release granted locks */ lock= lo->active_locks; while (lock) { TABLE_LOCK *cur= lock; pthread_mutex_t *mutex= & lock->table->mutex; + DBUG_ASSERT(cur->loid == lo->loid); lock= lock->next_in_lo; + /* TODO ? group locks by table to reduce the number of mutex locks */ pthread_mutex_lock(mutex); hash_delete(& cur->table->active, (byte *)cur); @@ -330,17 +480,21 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) cur->next= local_pool; local_pool= cur; - DBUG_ASSERT(cur->loid == lo->loid); pthread_mutex_unlock(mutex); } lo->waiting_lock= lo->active_locks= 0; + /* + okay, all locks released. now signal that we're leaving, + in case somebody's waiting for it + */ pthread_mutex_lock(lo->mutex); pthread_cond_broadcast(lo->cond); pthread_mutex_unlock(lo->mutex); + /* and push all freed locks to the lockman's pool */ pthread_mutex_lock(& lm->pool_mutex); local_pool_end->next= lm->pool; lm->pool= local_pool; @@ -350,7 +504,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) { lm->pool= 0; - lm->loid_to_lo= func; + lm->loid_to_tlo= func; lm->lock_timeout= timeout; pthread_mutex_init(&lm->pool_mutex, MY_MUTEX_INIT_FAST); } @@ -366,12 +520,12 @@ void tablockman_destroy(TABLOCKMAN *lm) pthread_mutex_destroy(&lm->pool_mutex); } -void tablockman_init_locked_table(LOCKED_TABLE *lt) +void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size) { TABLE_LOCK *unused; bzero(lt, sizeof(*lt)); pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST); - hash_init(& lt->active, &my_charset_bin, 10/*FIXME*/, + hash_init(& lt->active, &my_charset_bin, initial_hash_size, offsetof(TABLE_LOCK, loid), sizeof(unused->loid), 0, 0, 0); } @@ -381,6 +535,7 @@ void tablockman_destroy_locked_table(LOCKED_TABLE *lt) pthread_mutex_destroy(& lt->mutex); } +#ifdef EXTRA_DEBUG static char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX"}; @@ -396,4 +551,5 @@ void print_tlo(TABLE_LOCK_OWNER *lo) printf("!"); printf("\n"); } +#endif diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h index 4b2d165af54..d307b2c844f 100644 --- a/storage/maria/tablockman.h +++ b/storage/maria/tablockman.h @@ -51,34 +51,38 @@ typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16); typedef struct { pthread_mutex_t pool_mutex; - TABLE_LOCK *pool; + TABLE_LOCK *pool; /* lifo pool of free locks */ uint lock_timeout; - loid_to_tlo_func *loid_to_lo; + loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */ } TABLOCKMAN; struct st_table_lock_owner { - TABLE_LOCK *active_locks, *waiting_lock; - TABLE_LOCK_OWNER *waiting_for; - pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */ - pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ - uint16 loid; + TABLE_LOCK *active_locks; /* list of active locks */ + TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */ + TABLE_LOCK_OWNER *waiting_for; /* transaction we're wating for */ + pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid; /* Lock Owner IDentifier */ }; struct st_locked_table { - pthread_mutex_t mutex; - HASH active; // fast to remove - TABLE_LOCK *active_locks[LOCK_TYPES]; // fast to see a conflict - TABLE_LOCK *wait_queue_in, *wait_queue_out; + pthread_mutex_t mutex; /* mutex for everything below */ + HASH active; /* active locks ina hash */ + TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */ + TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque */ }; void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint); void tablockman_destroy(TABLOCKMAN *); enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *, - LOCKED_TABLE *, - enum lock_type lock); + LOCKED_TABLE *, enum lock_type); void tablockman_release_locks(TABLOCKMAN *, TABLE_LOCK_OWNER *); -void tablockman_init_locked_table(LOCKED_TABLE *); +void tablockman_init_locked_table(LOCKED_TABLE *, int); +void tablockman_destroy_locked_table(LOCKED_TABLE *); + +#ifdef EXTRA_DEBUG void print_tlo(TABLE_LOCK_OWNER *); +#endif #endif diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c index c9a4ff98f2a..ab4b7beba9f 100644 --- a/storage/maria/unittest/lockman1-t.c +++ b/storage/maria/unittest/lockman1-t.c @@ -288,10 +288,10 @@ int main() for (i= 0; i < Ntbls; i++) { - tablockman_init_locked_table(ltarray+i); + tablockman_init_locked_table(ltarray+i, Nlos); } - //test_tablockman_simple(); + test_tablockman_simple(); #define CYCLES 10000 #define THREADS Nlos /* don't change this line */ diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c index 72b3993e4b4..bde0766c885 100644 --- a/storage/maria/unittest/lockman2-t.c +++ b/storage/maria/unittest/lockman2-t.c @@ -115,6 +115,20 @@ void test_tablockman_simple() lock_conflict(2, 1, X); unlock_all(1); unlock_all(2); + + lock_ok_i(1, 1, IS); + lock_conflict(2, 1, X); + lock_conflict(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_a(1, 1, S); + lock_conflict(2, 1, IX); + lock_conflict(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); } int rt_num_threads; @@ -273,7 +287,7 @@ int main() for (i= 0; i < Ntbls; i++) { - tablockman_init_locked_table(ltarray+i); + tablockman_init_locked_table(ltarray+i, Nlos); } test_tablockman_simple(); @@ -285,7 +299,7 @@ int main() Nrows= 100; Ntables= 10; table_lock_ratio= 10; - run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + //run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); #if 0 /* "real-life" simulation - many rows, no table locks */ Nrows= 1000000; From f6edbbc85f0566cc6f79240529d42b2a9d9f7add Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 10 Nov 2006 16:18:10 +0100 Subject: [PATCH 54/95] Maria: importing change made to MyISAM's mi_test_all, into Maria's ma_test_all This makes an expected warning message about the index file's size, go away, as intended. storage/maria/ma_test_all.sh: importing change made to MyISAM's mi_test_all, into Maria's ma_test_all This makes an expected warning message about the index file's size, go away, as intended. --- storage/maria/ma_test_all.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index ec4d1778db9..73234d93ae4 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -79,7 +79,8 @@ if test -f ma_test1$MACH ; then suffix=$MACH ; else suffix=""; fi # check of maria_pack / maria_chk ./maria_pack$suffix --force -s test1 -./maria_chk$suffix -es test1 +# Ignore error for index file +./maria_chk$suffix -es test1 2>&1 >& /dev/null ./maria_chk$suffix -rqs test1 ./maria_chk$suffix -es test1 ./maria_chk$suffix -rs test1 From 41f890bc98fe26128ab18737e4acc846db09b74f Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 12 Nov 2006 14:44:12 +0100 Subject: [PATCH 55/95] minor unittest fixes storage/maria/lockman.c: restore removed lines storage/maria/unittest/lockman2-t.c: fix the plan --- storage/maria/lockman.c | 11 +++++++---- storage/maria/unittest/lockman2-t.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 31867d3903d..7a6b97b3d51 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -103,10 +103,6 @@ whether it's possible to lock the row, but no need to lock it - perhaps the thread has a loose lock on this table). This is defined by getlock_result[] table. - - TODO optimization: table locks - they have completely - different characteristics. long lists, few distinct resources - - slow to scan, [possibly] high retry rate */ #include @@ -487,6 +483,9 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) res= lockfind(head, node, &cursor, pins); DBUG_ASSERT(res & ALREADY_HAVE); + if (cursor.upgrade_from) + cursor.upgrade_from->flags&= ~IGNORE_ME; + /* XXX this does not work with savepoints, as old lock is left ignored. It cannot be unignored, as would basically mean moving the lock back @@ -506,7 +505,11 @@ static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) lockfind(head, node, &cursor, pins); } else + { res= REPEAT_ONCE_MORE; + if (cursor.upgrade_from) + cursor.upgrade_from->flags|= IGNORE_ME; + } } while (res == REPEAT_ONCE_MORE); _lf_unpin(pins, 0); _lf_unpin(pins, 1); diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c index bde0766c885..c515fc7ecd7 100644 --- a/storage/maria/unittest/lockman2-t.c +++ b/storage/maria/unittest/lockman2-t.c @@ -264,7 +264,7 @@ int main() my_init(); pthread_mutex_init(&rt_mutex, 0); - plan(35); + plan(39); if (my_atomic_initialize()) return exit_status(); From e5858e1fb429617bf16f94031b660a3694e2acdd Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 13 Nov 2006 00:26:29 +0200 Subject: [PATCH 56/95] postreview fixes fixed bug in the pagecache which lead to assertion in multithread test include/pagecache.h: post review fixes: - comments fixed - types fixed - keyword 'extern' added to all interface functions mysys/mf_pagecache.c: postreview fixes: - comments fixed and added - types fixed - typo fixed Added write locking flag set when we took a block from LRU and going to free it (to prevent locking/using it for write) mysys/test_pagecache_consist.c: pagecache size reduced (to be able reproduce problems found by Guilhem) typo in the comment fixed --- include/pagecache.h | 50 ++++----- mysys/mf_pagecache.c | 190 ++++++++++++++++++++++++--------- mysys/test_pagecache_consist.c | 4 +- 3 files changed, 169 insertions(+), 75 deletions(-) diff --git a/include/pagecache.h b/include/pagecache.h index 018e695fa27..4d64070ad62 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -119,14 +119,14 @@ typedef struct st_pagecache ulonglong time; /* total number of block link operations */ uint hash_entries; /* max number of entries in the hash table */ int hash_links; /* max number of hash links */ - int hash_links_used; /* number of hash links currently used */ + int hash_links_used; /* number of hash links taken from free links pool */ int disk_blocks; /* max number of blocks in the cache */ ulong blocks_used; /* maximum number of concurrently used blocks */ ulong blocks_unused; /* number of currently unused blocks */ ulong blocks_changed; /* number of currently dirty blocks */ ulong warm_blocks; /* number of blocks in warm sub-chain */ ulong cnt_for_resize_op; /* counter to block resize operation */ - long blocks_available; /* number of blocks available in the LRU chain */ + ulong blocks_available; /* number of blocks available in the LRU chain */ PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */ PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */ PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */ @@ -194,32 +194,32 @@ extern my_bool pagecache_write(PAGECACHE *pagecache, enum pagecache_page_pin pin, enum pagecache_write_mode write_mode, PAGECACHE_PAGE_LINK *link); -void pagecache_unlock_page(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN first_REDO_LSN_for_page); -void pagecache_unlock(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN first_REDO_LSN_for_page); -void pagecache_unpin_page(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno); -void pagecache_unpin(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link); +extern void pagecache_unlock_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page); +extern void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page); +extern void pagecache_unpin_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno); +extern void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link); extern int flush_pagecache_blocks(PAGECACHE *keycache, PAGECACHE_FILE *file, enum flush_type type); -my_bool pagecache_delete_page(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - enum pagecache_page_lock lock, - my_bool flush); +extern my_bool pagecache_delete_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush); extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); C_MODE_END diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4fa814d8188..e1ccab9e06f 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2000-2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,11 +26,11 @@ When a new block is required it is first tried to pop one from the stack. If the stack is empty, it is tried to get a never-used block from the pool. If this is empty too, then a block is taken from the LRU ring, flushing it - to disk, if neccessary. This is handled in find_key_block(). + to disk, if necessary. This is handled in find_key_block(). With the new free list, the blocks can have three temperatures: hot, warm and cold (which is free). This is remembered in the block header by the enum BLOCK_TEMPERATURE temperature variable. Remembering the - temperature is neccessary to correctly count the number of warm blocks, + temperature is necessary to correctly count the number of warm blocks, which is required to decide when blocks are allowed to become hot. Whenever a block is inserted to another (sub-)chain, we take the old and new temperature into account to decide if we got one more or less warm block. @@ -93,16 +93,22 @@ */ #define SERIALIZED_READ_FROM_CACHE yes -#define BLOCK_INFO(B) DBUG_PRINT("info", \ - ("block 0x%lx, file %lu, page %lu, s %0x", \ - (ulong)(B), \ - (ulong)((B)->hash_link ? \ - (B)->hash_link->file.file : \ - 0), \ - (ulong)((B)->hash_link ? \ - (B)->hash_link->pageno : \ - 0), \ - (B)->status)) +#define BLOCK_INFO(B) \ + DBUG_PRINT("info", \ + ("block 0x%lx, file %lu, page %lu, s %0x, hshL 0x%lx, req %u/%u", \ + (ulong)(B), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->file.file : \ + 0), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->pageno : \ + 0), \ + (B)->status, \ + (ulong)(B)->hash_link, \ + (uint) (B)->requests, \ + (uint)((B)->hash_link ? \ + (B)->hash_link->requests : \ + 0))) /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; @@ -147,7 +153,7 @@ struct st_pagecache_hash_link }; /* simple states of a block */ -#define BLOCK_ERROR 1 /* an error occured when performing disk i/o */ +#define BLOCK_ERROR 1 /* an error occurred when performing disk i/o */ #define BLOCK_READ 2 /* the is page in the block buffer */ #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ #define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */ @@ -211,7 +217,20 @@ typedef struct st_pagecache_lock_info struct st_my_thread_var *thread; my_bool write_lock; } PAGECACHE_LOCK_INFO; -/* service functions */ + + +/* service functions maintain debugging info about pin & lock */ + + +/* + Links information about thread pinned/locked the block to the list + + SYNOPSIS + info_link() + list the list to link in + node the node which should be linked +*/ + void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) { if ((node->next= *list)) @@ -219,11 +238,38 @@ void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) *list= node; node->prev= list; } + + +/* + Unlinks information about thread pinned/locked the block from the list + + SYNOPSIS + info_unlink() + node the node which should be unlinked +*/ + void info_unlink(PAGECACHE_PIN_INFO *node) { if ((*node->prev= node->next)) node->next->prev= node->prev; } + + +/* + Finds information about given thread in the list of threads which + pinned/locked this block. + + SYNOPSIS + info_find() + list the list where to find the thread + thread thread ID (reference to the st_my_thread_var + of the thread) + + RETURN + 0 - the thread was not found + pointer to the information node of the thread in the list +*/ + PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, struct st_my_thread_var *thread) { @@ -263,8 +309,8 @@ struct st_pagecache_block_link #ifdef PAGECACHE_DEBUG /* debug checks */ -bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) +my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_pin"); @@ -305,9 +351,10 @@ bool info_check_pin(PAGECACHE_BLOCK_LINK *block, } DBUG_RETURN(0); } -bool info_check_lock(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) + +my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_lock"); @@ -328,7 +375,7 @@ bool info_check_lock(PAGECACHE_BLOCK_LINK *block, break; case PAGECACHE_LOCK_LEFT_READLOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_PIN_LEFT_UNPINNED); + pin == PAGECACHE_PIN_LEFT_PINNED); if (info == 0 || info->write_lock) { DBUG_PRINT("info", @@ -650,6 +697,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, pagecache->shift= my_bit_log2(block_size); DBUG_PRINT("info", ("block_size: %u", block_size)); + DBUG_ASSERT((1 << pagecache->shift) == block_size); blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + 2 * sizeof(PAGECACHE_HASH_LINK) + @@ -755,7 +803,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, } pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; - DBUG_RETURN((uint) pagecache->disk_blocks); + DBUG_RETURN((uint) pagecache->blocks); err: error= my_errno; @@ -945,7 +993,7 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, /* - Remove page cache from memory + Flushes and removes page cache from memory SYNOPSIS end_pagecache() @@ -1240,7 +1288,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, my_bool hot, my_bool at_end) { PAGECACHE_BLOCK_LINK *ins; - PAGECACHE_BLOCK_LINK **pins; + PAGECACHE_BLOCK_LINK **ptr_ins; KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); #ifdef THREAD @@ -1285,8 +1333,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); /* Condition not transformed using DeMorgan, to keep the text identical */ #endif /* THREAD */ - pins= hot ? &pagecache->used_ins : &pagecache->used_last; - ins= *pins; + ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last; + ins= *ptr_ins; if (ins) { ins->next_used->prev_used= &block->next_used; @@ -1294,7 +1342,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, block->prev_used= &ins->next_used; ins->next_used= block; if (at_end) - *pins= block; + *ptr_ins= block; } else { @@ -1363,6 +1411,16 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) /* Register requests for a block + + SYNOPSIS + reg_requests() + pagecache this page cache reference + block the block we request reference + count how many requests we register (it is 1 everywhere) + + NOTE + Registration of request means we are going to use this block so we exclude + it from the LRU if it is first request */ static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, int count) @@ -1375,7 +1433,7 @@ static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, if (! block->requests) /* First request for the block unlinks it */ unlink_block(pagecache, block); - block->requests+=count; + block->requests+= count; DBUG_VOID_RETURN; } @@ -1461,8 +1519,11 @@ static void unreg_request(PAGECACHE *pagecache, static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) { + DBUG_ENTER("remove_reader"); + BLOCK_INFO(block); if (! --block->hash_link->requests && block->condvar) pagecache_pthread_cond_signal(block->condvar); + DBUG_VOID_RETURN; } @@ -1563,8 +1624,21 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) hash_link->next= pagecache->free_hash_list; pagecache->free_hash_list= hash_link; } + + /* - Get the hash link for the page if it is inthe cache + Get the hash link for the page if it is in the cache + + SYNOPSIS + get_present_hash_link() + pagecache Pagecache reference + file file ID + pageno page number in the file + start where to put pointer to found hash link (for + direct referring it) + + RETURN + found hashlink pointer */ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, @@ -1766,8 +1840,8 @@ restart: /* Remove block to invalidate the page in the block buffer as we are going to write directly on disk. - Although we have an exlusive lock for the updated key part - the control can be yieded by the current thread as we might + Although we have an exclusive lock for the updated key part + the control can be yielded by the current thread as we might have unfinished readers of other key parts in the block buffer. Still we are guaranteed not to have any readers of the key part we are writing into until the block is @@ -1777,7 +1851,7 @@ restart: free_block(pagecache, block); return 0; } - /* Wait intil the page is flushed on disk */ + /* Wait until the page is flushed on disk */ hash_link->requests--; { #ifdef THREAD @@ -1943,12 +2017,17 @@ restart: reg_requests(pagecache, block,1); hash_link->block= block; } + else + { + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + } if (block->hash_link != hash_link && ! (block->status & BLOCK_IN_SWITCH) ) { /* this is a primary request for a new page */ - block->status|= BLOCK_IN_SWITCH; + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); KEYCACHE_DBUG_PRINT("find_key_block", ("got block %u for new page", @@ -1995,7 +2074,7 @@ restart: } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + BLOCK_INFO(block); block->status= error? BLOCK_ERROR : 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -2094,7 +2173,7 @@ void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) DBUG_VOID_RETURN; } #ifdef PAGECACHE_DEBUG -void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); @@ -2112,7 +2191,7 @@ void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) info_unlink((PAGECACHE_PIN_INFO *)info); my_free((gptr)info, MYF(0)); } -void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, @@ -2144,7 +2223,7 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, { DBUG_ENTER("pagecache_lock_block"); BLOCK_INFO(block); - if (block->status & BLOCK_WRLOCK) + while (block->status & BLOCK_WRLOCK) { DBUG_PRINT("info", ("fail to lock, waiting...")); /* Lock failed we will wait */ @@ -2168,6 +2247,7 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, } /* we are doing it by global cache mutex protectio, so it is OK */ block->status|= BLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); DBUG_RETURN(0); } @@ -2177,6 +2257,7 @@ void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) BLOCK_INFO(block); DBUG_ASSERT(block->status & BLOCK_WRLOCK); block->status&= ~BLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); #ifdef THREAD /* release all threads waiting for write lock */ if (block->wqueue[COND_FOR_WRLOCK].last_thread) @@ -2434,6 +2515,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + BLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); if (stamp_this_page) { @@ -2776,17 +2858,18 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_r_requests++; - block= find_key_block(pagecache, file, pageno, level, 0, - (((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)) ? 0 : 1), - &page_st); + block= find_key_block(pagecache, file, pageno, level, + ((lock == PAGECACHE_LOCK_WRITE) ? 1 : 0), + (((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)) ? 0 : 1), + &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || block->type == type); block->type= type; if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) { /* - We failed to writelock the block, cache is unlocked, and last write + We failed to write lock the block, cache is unlocked, and last write lock is released, we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); @@ -3095,11 +3178,16 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_w_requests++; - block= find_key_block(pagecache, file, pageno, level, - (write_mode == PAGECACHE_WRITE_DONE ? 0 : 1), - (((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)) ? 0 : 1), - &page_st); + { + int need_wrlock= (write_mode != PAGECACHE_WRITE_DONE && + lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && + lock != PAGECACHE_LOCK_WRITE_UNLOCK && + lock != PAGECACHE_LOCK_WRITE_TO_READ); + block= find_key_block(pagecache, file, pageno, level, + (need_wrlock ? 1 : 0), + (need_wrlock ? 1 : 0), + &page_st); + } if (!block) { DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); @@ -3178,9 +3266,15 @@ restart: } /* Unregister the request */ + block->hash_link->requests--; if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) - unreg_request(pagecache, block, 1); + { + if (write_mode != PAGECACHE_WRITE_DONE) + { + unreg_request(pagecache, block, 1); + } + } else *link= (PAGECACHE_PAGE_LINK)block; diff --git a/mysys/test_pagecache_consist.c b/mysys/test_pagecache_consist.c index 1cc54af2460..86698fbb860 100755 --- a/mysys/test_pagecache_consist.c +++ b/mysys/test_pagecache_consist.c @@ -11,7 +11,7 @@ #include /*#define PAGE_SIZE 65536*/ -#define PCACHE_SIZE (PAGE_SIZE*1024*20) +#define PCACHE_SIZE (PAGE_SIZE*1024*8) #ifndef DBUG_OFF static const char* default_dbug_option; @@ -59,7 +59,7 @@ static uint flush_divider= 1000; #endif /*TEST_HIGH_CONCURENCY*/ -/* check page consistemcy */ +/* check page consistency */ uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, int tag) { From e2cddea9b5ca3ad796f3b6b77d33c1591fc30a98 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 14 Nov 2006 20:27:38 +0200 Subject: [PATCH 57/95] make it possible to use the tree on caseinsensitive file systems BitKeeper/deleted/.del-cmakelists_.txt: Delete: storage/maria/cmakelists_.txt --- storage/maria/cmakelists.txt | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 storage/maria/cmakelists.txt diff --git a/storage/maria/cmakelists.txt b/storage/maria/cmakelists.txt deleted file mode 100644 index 3ba7aba4555..00000000000 --- a/storage/maria/cmakelists.txt +++ /dev/null @@ -1,26 +0,0 @@ -SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") -SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") - -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include) -ADD_LIBRARY(myisam ft_boolean_search.c ft_nlq_search.c ft_parser.c ft_static.c ft_stem.c - ft_stopwords.c ft_update.c mi_cache.c mi_changed.c mi_check.c - mi_checksum.c mi_close.c mi_create.c mi_dbug.c mi_delete.c - mi_delete_all.c mi_delete_table.c mi_dynrec.c mi_extra.c mi_info.c - mi_key.c mi_keycache.c mi_locking.c mi_log.c mi_open.c - mi_packrec.c mi_page.c mi_panic.c mi_preload.c mi_range.c mi_rename.c - mi_rfirst.c mi_rlast.c mi_rnext.c mi_rnext_same.c mi_rprev.c mi_rrnd.c - mi_rsame.c mi_rsamepos.c mi_scan.c mi_search.c mi_static.c mi_statrec.c - mi_unique.c mi_update.c mi_write.c rt_index.c rt_key.c rt_mbr.c - rt_split.c sort.c sp_key.c ft_eval.h myisamdef.h rt_index.h mi_rkey.c) - -ADD_EXECUTABLE(myisam_ftdump myisam_ftdump.c) -TARGET_LINK_LIBRARIES(myisam_ftdump myisam mysys dbug strings zlib wsock32) - -ADD_EXECUTABLE(myisamchk myisamchk.c) -TARGET_LINK_LIBRARIES(myisamchk myisam mysys dbug strings zlib wsock32) - -ADD_EXECUTABLE(myisamlog myisamlog.c) -TARGET_LINK_LIBRARIES(myisamlog myisam mysys dbug strings zlib wsock32) - -ADD_EXECUTABLE(myisampack myisampack.c) -TARGET_LINK_LIBRARIES(myisampack myisam mysys dbug strings zlib wsock32) From 9a06c5023676d3750e54c5ef91e5b7ca4feb0fea Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 15 Nov 2006 12:58:37 +0200 Subject: [PATCH 58/95] fixed typo storage/maria/CMakeLists.txt: Rename: storage/maria/_MakeLists.txt -> storage/maria/CMakeLists.txt --- storage/maria/{_MakeLists.txt => CMakeLists.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename storage/maria/{_MakeLists.txt => CMakeLists.txt} (100%) diff --git a/storage/maria/_MakeLists.txt b/storage/maria/CMakeLists.txt similarity index 100% rename from storage/maria/_MakeLists.txt rename to storage/maria/CMakeLists.txt From 915cebdd53fe5071dc9443a236a798764f504c22 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 16 Nov 2006 15:40:08 +0100 Subject: [PATCH 59/95] post-review fixes. tablockman: fixed a bug in finding a blocker lock mysys/my_getsystime.c: this is no longer true storage/maria/lockman.h: post-review fixes storage/maria/tablockman.h: post-review fixes storage/maria/unittest/lockman-t.c: post-review fixes storage/maria/unittest/lockman1-t.c: post-review fixes storage/maria/unittest/lockman2-t.c: post-review fixes include/my_atomic.h: moved intptr definition to my_global.h storage/maria/tablockman.c: post-review fixes BUILD/SETUP.sh: add -DMY_LF_EXTRA_DEBUG to debug builds include/atomic/nolock.h: suppress warning include/my_global.h: suppress warning mysys/lf_alloc-pin.c: post-review fixes mysys/lf_dynarray.c: post-review fixes mysys/lf_hash.c: post-review fixes storage/maria/trnman.c: suppress warning include/lf.h: post-review fix --- BUILD/SETUP.sh | 3 +- include/atomic/nolock.h | 2 +- include/lf.h | 47 ++-- include/my_atomic.h | 8 - include/my_global.h | 11 +- mysys/lf_alloc-pin.c | 29 +-- mysys/lf_dynarray.c | 35 ++- mysys/lf_hash.c | 119 ++++----- mysys/my_getsystime.c | 4 - storage/maria/lockman.h | 7 +- storage/maria/tablockman.c | 373 ++++++++++++++++++---------- storage/maria/tablockman.h | 52 ++-- storage/maria/trnman.c | 3 +- storage/maria/unittest/lockman-t.c | 4 + storage/maria/unittest/lockman1-t.c | 23 +- storage/maria/unittest/lockman2-t.c | 54 ++-- 16 files changed, 472 insertions(+), 302 deletions(-) diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index f5fc16d9c9e..4677087bc3b 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -117,8 +117,9 @@ valgrind_flags="-USAFEMALLOC -UFORCE_INIT_OF_VARS -DHAVE_purify " valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max" # # Used in -debug builds -debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS " +debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS" debug_cflags="$debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC -DSAFE_MUTEX" +debug_cflags="$debug_cflags -DMY_LF_EXTRA_DEBUG" error_inject="--with-error-inject " # # Base C++ flags for all builds diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index a696e008f03..9d367657a69 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -32,7 +32,7 @@ #ifdef make_atomic_cas_body -typedef struct { } my_atomic_rwlock_t; +typedef struct { } my_atomic_rwlock_t __attribute__ ((unused)); #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) diff --git a/include/lf.h b/include/lf.h index 3a3b5b28ade..c7198ab75b6 100644 --- a/include/lf.h +++ b/include/lf.h @@ -24,7 +24,7 @@ func() is a _func() protected by my_atomic_rwlock_wrlock() */ -#define lock_wrap(f,t,proto_args, args, lock) \ +#define lock_wrap(f, t, proto_args, args, lock) \ t _ ## f proto_args; \ static inline t f proto_args \ { \ @@ -35,7 +35,7 @@ static inline t f proto_args \ return ret; \ } -#define lock_wrap_void(f,proto_args, args, lock) \ +#define lock_wrap_void(f, proto_args, args, lock) \ void _ ## f proto_args; \ static inline void f proto_args \ { \ @@ -44,14 +44,14 @@ static inline void f proto_args \ my_atomic_rwlock_wrunlock(lock); \ } -#define nolock_wrap(f,t,proto_args, args) \ +#define nolock_wrap(f, t, proto_args, args) \ t _ ## f proto_args; \ static inline t f proto_args \ { \ return _ ## f args; \ } -#define nolock_wrap_void(f,proto_args, args) \ +#define nolock_wrap_void(f, proto_args, args) \ void _ ## f proto_args; \ static inline void f proto_args \ { \ @@ -80,14 +80,14 @@ void lf_dynarray_destroy(LF_DYNARRAY *array); nolock_wrap(lf_dynarray_value, void *, (LF_DYNARRAY *array, uint idx), - (array,idx)); + (array, idx)); lock_wrap(lf_dynarray_lvalue, void *, (LF_DYNARRAY *array, uint idx), - (array,idx), + (array, idx), &array->lock); nolock_wrap(lf_dynarray_iterate, int, (LF_DYNARRAY *array, lf_dynarray_func func, void *arg), - (array,func,arg)); + (array, func, arg)); /* pin manager for memory allocator, lf_alloc-pin.c @@ -115,9 +115,14 @@ typedef struct { uint32 volatile link; /* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ char pad[128-sizeof(uint32)*2 - -sizeof(void *)*(LF_PINBOX_PINS+2)]; + -sizeof(LF_PINBOX *) + -sizeof(void *)*(LF_PINBOX_PINS+1)]; } LF_PINS; +/* + shortcut macros to do an atomic_wrlock on a structure that uses pins + (e.g. lf_hash). +*/ #define lf_rwlock_by_pins(PINS) \ my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock) #define lf_rwunlock_by_pins(PINS) \ @@ -131,11 +136,11 @@ typedef struct { #if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) #define LF_REQUIRE_PINS(N) \ static const char require_pins[LF_PINBOX_PINS-N]; \ - static const int LF_NUM_PINS_IN_THIS_FILE=N; + static const int LF_NUM_PINS_IN_THIS_FILE= N; #define _lf_pin(PINS, PIN, ADDR) \ ( \ - my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)), \ - assert(PIN < LF_NUM_PINS_IN_THIS_FILE) \ + assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \ + my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \ ) #else #define LF_REQUIRE_PINS(N) @@ -151,7 +156,7 @@ typedef struct { } while (0) #define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) #define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0) -#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN]==0) +#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0) void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, lf_pinbox_free_func *free_func, void * free_func_arg); @@ -167,16 +172,20 @@ lock_wrap_void(lf_pinbox_put_pins, &pins->pinbox->pinstack.lock); lock_wrap_void(lf_pinbox_free, (LF_PINS *pins, void *addr), - (pins,addr), + (pins, addr), &pins->pinbox->pinstack.lock); /* memory allocator, lf_alloc-pin.c */ +struct st_lf_alloc_node { + struct st_lf_alloc_node *next; +}; + typedef struct st_lf_allocator { LF_PINBOX pinbox; - void * volatile top; + struct st_lf_alloc_node * volatile top; uint element_size; uint32 volatile mallocs; } LF_ALLOCATOR; @@ -184,13 +193,17 @@ typedef struct st_lf_allocator { void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset); void lf_alloc_destroy(LF_ALLOCATOR *allocator); uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); +/* + shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR + see _lf_pinbox_get_pins() and _lf_pinbox_put_pins() +*/ #define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) #define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) #define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox) #define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox) #define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS) #define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS) -#define lf_alloc_real_free(ALLOC,ADDR) my_free((gptr)(ADDR), MYF(0)) +#define lf_alloc_real_free(ALLOC, ADDR) my_free((gptr)(ADDR), MYF(0)) lock_wrap(lf_alloc_new, void *, (LF_PINS *pins), @@ -222,6 +235,10 @@ void lf_hash_destroy(LF_HASH *hash); int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data); void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); +/* + shortcut macros to access underlying pinbox functions from an LF_HASH + see _lf_pinbox_get_pins() and _lf_pinbox_put_pins() +*/ #define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc) #define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc) #define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS) diff --git a/include/my_atomic.h b/include/my_atomic.h index 8efad2802c0..78121c59c40 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -173,14 +173,6 @@ make_atomic_fas(ptr) #define LF_BACKOFF (1) #endif -#if SIZEOF_CHARP == SIZEOF_INT -typedef int intptr; -#elif SIZEOF_CHARP == SIZEOF_LONG -typedef long intptr; -#else -#error -#endif - #define MY_ATOMIC_OK 0 #define MY_ATOMIC_NOT_1CPU 1 extern int my_atomic_initialize(); diff --git a/include/my_global.h b/include/my_global.h index 7f7b502e1a7..6831a76c945 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -432,7 +432,8 @@ C_MODE_END #define compile_time_assert(X) \ do \ { \ - char compile_time_assert[(X) ? 1 : -1]; \ + char compile_time_assert[(X) ? 1 : -1] \ + __attribute__ ((unused)); \ } while(0) /* Go around some bugs in different OS and compilers */ @@ -964,6 +965,14 @@ typedef unsigned __int64 my_ulonglong; typedef unsigned long long my_ulonglong; #endif +#if SIZEOF_CHARP == SIZEOF_INT +typedef int intptr; +#elif SIZEOF_CHARP == SIZEOF_LONG +typedef long intptr; +#else +#error +#endif + #ifdef USE_RAID /* The following is done with a if to not get problems with pre-processors diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index b96fe42311b..43055766c3e 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -91,7 +91,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins); See the latter for details. */ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, - lf_pinbox_free_func *free_func,void *free_func_arg) + lf_pinbox_free_func *free_func, void *free_func_arg) { DBUG_ASSERT(sizeof(LF_PINS) == 128); DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); @@ -306,7 +306,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) { if (addr) /* use binary search */ { - void **a,**b,**c; + void **a, **b, **c; for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) if (cur == *c) a= b= c; @@ -337,13 +337,13 @@ found: callback for _lf_pinbox_real_free to free an unpinned object - add it back to the allocator stack */ -static void alloc_free(void *node, LF_ALLOCATOR *allocator) +static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator) { - void *tmp; + struct st_lf_alloc_node *tmp; tmp= allocator->top; do { - (*(void **)node)= tmp; + node->next= tmp; } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && LF_BACKOFF); } @@ -379,12 +379,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) */ void lf_alloc_destroy(LF_ALLOCATOR *allocator) { - void *el= allocator->top; - while (el) + struct st_lf_alloc_node *node= allocator->top; + while (node) { - void *tmp= *(void **)el; - my_free(el, MYF(0)); - el= tmp; + struct st_lf_alloc_node *tmp= node->next; + my_free((void *)node, MYF(0)); + node= tmp; } lf_pinbox_destroy(&allocator->pinbox); allocator->top= 0; @@ -400,7 +400,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator) void *_lf_alloc_new(LF_PINS *pins) { LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); - void *node; + struct st_lf_alloc_node *node; for (;;) { do @@ -410,7 +410,8 @@ void *_lf_alloc_new(LF_PINS *pins) } while (node != allocator->top && LF_BACKOFF); if (!node) { - if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + if (!(node= (void *)my_malloc(allocator->element_size, + MYF(MY_WME|MY_ZEROFILL)))) break; #ifdef MY_LF_EXTRA_DEBUG my_atomic_add32(&allocator->mallocs, 1); @@ -434,8 +435,8 @@ void *_lf_alloc_new(LF_PINS *pins) uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) { uint i; - void *node; - for (node= allocator->top, i= 0; node; node= *(void **)node, i++) + struct st_lf_alloc_node *node; + for (node= allocator->top, i= 0; node; node= node->next, i++) /* no op */; return i; } diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index d63af91813e..c6dd654bf03 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -19,9 +19,9 @@ (so no pointer into the array may ever become invalid). Memory is allocated in non-contiguous chunks. - This data structure is not space efficient for sparce arrays. + This data structure is not space efficient for sparse arrays. - The number of elements is limited to 2^16 + The number of elements is limited to 4311810304 Every element is aligned to sizeof(element) boundary (to avoid false sharing if element is big enough). @@ -49,7 +49,8 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) static void recursive_free(void **alloc, int level) { - if (!alloc) return; + if (!alloc) + return; if (level) { @@ -68,10 +69,9 @@ void lf_dynarray_destroy(LF_DYNARRAY *array) for (i= 0; i < LF_DYNARRAY_LEVELS; i++) recursive_free(array->level[i], i); my_atomic_rwlock_destroy(&array->lock); - bzero(array, sizeof(*array)); } -static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]= { 0, /* +1 here to to avoid -1's below */ LF_DYNARRAY_LEVEL_LENGTH, @@ -82,6 +82,15 @@ static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH }; +static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH, +}; + /* Returns a valid lvalue pointer to the element number 'idx'. Allocates memory if necessary. @@ -91,16 +100,17 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) void * ptr, * volatile * ptr_ptr= 0; int i; - for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; ptr_ptr= &array->level[i]; - idx-= dynarray_idxes_in_prev_level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; for (; i > 0; i--) { if (!(ptr= *ptr_ptr)) { void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), - MYF(MY_WME|MY_ZEROFILL)); - if (!alloc) + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) return(NULL); if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) ptr= alloc; @@ -116,7 +126,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + max(array->size_of_element, sizeof(void *)), MYF(MY_WME|MY_ZEROFILL)); - if (!alloc) + if (unlikely(!alloc)) return(NULL); /* reserve the space for free() address */ data= alloc + sizeof(void *); @@ -143,9 +153,10 @@ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) void * ptr, * volatile * ptr_ptr= 0; int i; - for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; ptr_ptr= &array->level[i]; - idx-= dynarray_idxes_in_prev_level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; for (; i > 0; i--) { if (!(ptr= *ptr_ptr)) diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 66ad672f345..ff0eb8326d5 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -23,6 +23,7 @@ (but how to do it in lf_hash_delete ?) */ #include +#include #include #include #include @@ -33,7 +34,7 @@ LF_REQUIRE_PINS(3); typedef struct { intptr volatile link; /* a pointer to the next element in a listand a flag */ uint32 hashnr; /* reversed hash number, for sorting */ - const uchar *key; + const byte *key; uint keylen; } LF_SLIST; @@ -67,31 +68,31 @@ typedef struct { pins[0..2] are used, they are NOT removed on return */ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, - const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) + const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; - const uchar *cur_key; + const byte *cur_key; uint cur_keylen; intptr link; retry: - cursor->prev=(intptr *)head; + cursor->prev= (intptr *)head; do { - cursor->curr=PTR(*cursor->prev); - _lf_pin(pins,1,cursor->curr); + cursor->curr= PTR(*cursor->prev); + _lf_pin(pins, 1, cursor->curr); } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); for (;;) { if (!cursor->curr) return 0; do { // XXX or goto retry ? - link=cursor->curr->link; - cursor->next=PTR(link); + link= cursor->curr->link; + cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); } while(link != cursor->curr->link && LF_BACKOFF); - cur_hashnr=cursor->curr->hashnr; - cur_key=cursor->curr->key; - cur_keylen=cursor->curr->keylen; + cur_hashnr= cursor->curr->hashnr; + cur_key= cursor->curr->key; + cur_keylen= cursor->curr->keylen; if (*cursor->prev != (intptr)cursor->curr) { LF_BACKOFF; @@ -101,12 +102,12 @@ retry: { if (cur_hashnr >= hashnr) { - int r=1; + int r= 1; if (cur_hashnr > hashnr || - (r=my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) + (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) return !r; } - cursor->prev=&(cursor->curr->link); + cursor->prev= &(cursor->curr->link); _lf_pin(pins, 2, cursor->curr); } else @@ -120,7 +121,7 @@ retry: goto retry; } } - cursor->curr=cursor->next; + cursor->curr= cursor->next; _lf_pin(pins, 1, cursor->curr); } } @@ -141,21 +142,21 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, LF_SLIST *node, LF_PINS *pins, uint flags) { CURSOR cursor; - int res=-1; + int res= -1; do { if (lfind(head, cs, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) - res=0; /* duplicate found */ + res= 0; /* duplicate found */ else { - node->link=(intptr)cursor.curr; + node->link= (intptr)cursor.curr; assert(node->link != (intptr)node); assert(cursor.prev != &node->link); if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) - res=1; /* inserted ok */ + res= 1; /* inserted ok */ } } while (res == -1); _lf_unpin(pins, 0); @@ -177,10 +178,10 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, it uses pins[0..2], on return all pins are removed. */ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, - const uchar *key, uint keylen, LF_PINS *pins) + const byte *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res=-1; + int res= -1; do { @@ -218,30 +219,30 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, all other pins are removed. */ static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, - uint32 hashnr, const uchar *key, uint keylen, + uint32 hashnr, const byte *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res=lfind(head, cs, hashnr, key, keylen, &cursor, pins); + int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins); if (res) _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); return res ? cursor.curr : 0; } -static inline const uchar* hash_key(const LF_HASH *hash, - const uchar *record, uint *length) +static inline const byte* hash_key(const LF_HASH *hash, + const byte *record, uint *length) { if (hash->get_key) - return (*hash->get_key)(record,length,0); - *length=hash->key_length; + return (*hash->get_key)(record, length, 0); + *length= hash->key_length; return record + hash->key_offset; } -static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) +static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) { - ulong nr1=1, nr2=4; - hash->charset->coll->hash_sort(hash->charset,key,keylen,&nr1,&nr2); + ulong nr1= 1, nr2= 4; + hash->charset->coll->hash_sort(hash->charset, key, keylen, &nr1, &nr2); return nr1 & INT_MAX32; } @@ -258,28 +259,28 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, offsetof(LF_SLIST, key)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); - hash->size=1; - hash->count=0; - hash->element_size=element_size; - hash->flags=flags; - hash->charset=charset ? charset : &my_charset_bin; - hash->key_offset=key_offset; - hash->key_length=key_length; - hash->get_key=get_key; + hash->size= 1; + hash->count= 0; + hash->element_size= element_size; + hash->flags= flags; + hash->charset= charset ? charset : &my_charset_bin; + hash->key_offset= key_offset; + hash->key_length= key_length; + hash->get_key= get_key; DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); } void lf_hash_destroy(LF_HASH *hash) { - LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); while (el) { - intptr next=el->link; + intptr next= el->link; if (el->hashnr & 1) lf_alloc_real_free(&hash->alloc, el); else my_free((void *)el, MYF(0)); - el=(LF_SLIST *)next; + el= (LF_SLIST *)next; } lf_alloc_destroy(&hash->alloc); lf_dynarray_destroy(&hash->array); @@ -299,19 +300,19 @@ void lf_hash_destroy(LF_HASH *hash) */ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) { - uint csize, bucket, hashnr; + int csize, bucket, hashnr; LF_SLIST *node, * volatile *el; lf_rwlock_by_pins(pins); - node=(LF_SLIST *)_lf_alloc_new(pins); + node= (LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); - node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); + node->key= hash_key(hash, (byte *)(node+1), &node->keylen); hashnr= calc_hash(hash, node->key, node->keylen); bucket= hashnr % hash->size; - el=_lf_dynarray_lvalue(&hash->array, bucket); + el= _lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - node->hashnr=my_reverse_bits(hashnr) | 1; + node->hashnr= my_reverse_bits(hashnr) | 1; if (linsert(el, hash->charset, node, pins, hash->flags)) { _lf_alloc_free(pins, node); @@ -335,15 +336,15 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el; - uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); - el=_lf_dynarray_lvalue(&hash->array, bucket); + el= _lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, - (uchar *)key, keylen, pins)) + (byte *)key, keylen, pins)) { lf_rwunlock_by_pins(pins); return 1; @@ -360,33 +361,33 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el, *found; - uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); - el=_lf_dynarray_lvalue(&hash->array, bucket); + el= _lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, - (uchar *)key, keylen, pins); + (byte *)key, keylen, pins); lf_rwunlock_by_pins(pins); return found ? found+1 : 0; } -static char *dummy_key=""; +static char *dummy_key= ""; static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) { uint parent= my_clear_highest_bit(bucket); - LF_SLIST *dummy=(LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); - LF_SLIST **tmp=0, *cur; - LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); + LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp= 0, *cur; + LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent); if (*el == NULL && bucket) initialize_bucket(hash, el, parent, pins); - dummy->hashnr=my_reverse_bits(bucket); - dummy->key=dummy_key; - dummy->keylen=0; + dummy->hashnr= my_reverse_bits(bucket); + dummy->key= dummy_key; + dummy->keylen= 0; if ((cur= linsert(el, hash->charset, dummy, pins, 0))) { my_free((void *)dummy, MYF(0)); diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 91c977f0b5a..d1ed4f2ec92 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -35,10 +35,6 @@ ulonglong my_getsystime() LARGE_INTEGER t_cnt; if (!offset) { - /* strictly speaking there should be a mutex to protect - initialization section. But my_getsystime() is called from - UUID() code, and UUID() calls are serialized with a mutex anyway - */ LARGE_INTEGER li; FILETIME ft; GetSystemTimeAsFileTime(&ft); diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h index 6577a5e80fc..fd96f8930d5 100644 --- a/storage/maria/lockman.h +++ b/storage/maria/lockman.h @@ -32,7 +32,7 @@ SLX - Shared + Loose eXclusive LSIX - Loose Shared + Intention eXclusive */ -enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX }; +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; struct lockman_lock; @@ -55,9 +55,10 @@ typedef struct { uint lock_timeout; loid_to_lo_func *loid_to_lo; } LOCKMAN; - +#define DIDNT_GET_THE_LOCK 0 enum lockman_getlock_result { - DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK, + NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, + GOT_THE_LOCK, GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE }; diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c index 608f852cb7b..b4cf77401bb 100644 --- a/storage/maria/tablockman.c +++ b/storage/maria/tablockman.c @@ -1,5 +1,5 @@ -// TODO - allocate everything from dynarrays !!! (benchmark) -// automatically place S instead of LS if possible +#warning TODO - allocate everything from dynarrays !!! (benchmark) +#warning automatically place S instead of LS if possible /* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify @@ -16,10 +16,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include -#include -#include -#include +#include +#include #include "tablockman.h" /* @@ -53,45 +51,54 @@ resource, check if a conflicting lock exists, if yes - find who owns it. Solution: every resource has a structure with - 1. Hash of "active" (see below for the description of "active") granted - locks with loid as a key. Thus, checking if a given transaction has a - lock on this resource is O(1) operation. + 1. Hash of latest (see the lock upgrade section below) granted locks with + loid as a key. Thus, checking if a given transaction has a lock on + this resource is O(1) operation. 2. Doubly-linked lists of all granted locks - one list for every lock type. Thus, checking if a conflicting lock exists is a check whether an appropriate list head pointer is not null, also O(1). 3. Every lock has a loid of the owner, thus checking who owns a conflicting lock is also O(1). - 4. Deque of waiting locks. It's a deque not a fifo, because for lock - upgrades requests are added to the queue head, not tail. There's never - a need to scan the queue. - - Result: adding or removing a lock is always a O(1) operation, it does not - depend on the number of locks on the resource, or number of transactions, - or number of resources. It _does_ depend on the number of different lock - levels - O(number_of_lock_levels) - but it's a constant. + 4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo, + because for lock upgrades requests are added to the queue head, not + tail. This is a single place where there it gets O(N) on number + of locks - when a transaction wakes up from waiting on a condition, + it may need to scan the queue backward to the beginning to find + a conflicting lock. It is guaranteed though that "all transactions + before it" received the same - or earlier - signal. In other words a + transaction needs to scan all transactions before it that received the + signal but didn't have a chance to resume the execution yet, so + practically OS scheduler won't let the scan to be O(N). Waiting: if there is a conflicting lock or if wait queue is not empty, a requested lock cannot be granted at once. It is added to the end of the - wait queue. If there is a conflicting lock - the "blocker" transaction is - the owner of this lock. If there's no conflict but a queue was not empty, - than the "blocker" is the transaction that the owner of the lock at the - end of the queue is waiting for (in other words, our lock is added to the - end of the wait queue, and our blocker is the same as of the lock right - before us). + wait queue. If a queue was empty and there is a conflicting lock - the + "blocker" transaction is the owner of this lock. If a queue is not empty, + an owner of the previous lock in the queue is the "blocker". But if the + previous lock is compatible with the request, then the "blocker" is the + transaction that the owner of the lock at the end of the queue is waiting + for (in other words, our lock is added to the end of the wait queue, and + our blocker is the same as of the lock right before us). Lock upgrades: when a thread that has a lock on a given resource, requests a new lock on the same resource and the old lock is not enough to satisfy new lock requirements (which is defined by lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock - (defineded by lock_combining_matrix as above) is placed. Depending on - other granted locks it is immediately active or it has to wait. Here the + (defined by lock_combining_matrix as above) is placed. Depending on + other granted locks it is immediately granted or it has to wait. Here the lock is added to the start of the waiting queue, not to the end. Old lock, is removed from the hash, but not from the doubly-linked lists. (indeed, a transaction checks "do I have a lock on this resource ?" by looking in a hash, and it should find a latest lock, so old locks must be - removed; but a transaction checks "are the conflicting locks ?" by + removed; but a transaction checks "are there conflicting locks ?" by checking doubly-linked lists, it doesn't matter if it will find an old lock - if it would be removed, a new lock would be also a conflict). + So, a hash contains only "latest" locks - there can be only one latest + lock per resource per transaction. But doubly-linked lists contain all + locks, even "obsolete" ones, because it doesnt't hurt. Note that old + locks can not be freed early, in particular they stay in the + 'active_locks' list of a lock owner, because they may be "re-enabled" + on a savepoint rollback. To better support table-row relations where one needs to lock the table with an intention lock before locking the row, extended diagnostics is @@ -107,6 +114,18 @@ Instant duration locks are not supported. Though they're trivial to add, they are normally only used on rows, not on tables. So, presumably, they are not needed here. + + Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes + (TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex). + table mutex protects operations on the table lock structures, and lock + owner pointers waiting_for and waiting_for_loid. + lock owner mutex is only used to wait on lock owner condition + (TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock + structures, and only lock owner itself may access them. + The pool mutex protects a pool of unused locks. Note the locking order: + first the table mutex, then the owner mutex or a pool mutex. + Table mutex lock cannot be attempted when owner or pool mutex are locked. + No mutex lock can be attempted if owner or pool mutex are locked. */ /* @@ -122,9 +141,9 @@ 0 - incompatible -1 - "impossible", so that we can assert the impossibility. */ -static int lock_compatibility_matrix[10][10]= +static const int lock_compatibility_matrix[10][10]= { /* N S X IS IX SIX LS LX SLX LSIX */ - { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ @@ -144,18 +163,18 @@ static int lock_compatibility_matrix[10][10]= One should never get N from it, we assert the impossibility */ -static enum lock_type lock_combining_matrix[10][10]= +static const enum lock_type lock_combining_matrix[10][10]= {/* N S X IS IX SIX LS LX SLX LSIX */ - { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */ - { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ - { X, X, X, X, X, X, X, X, X, X}, /* X */ - { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ - { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ - { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ - { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ - { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ - { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ - { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ + { N, N, N, N, N, N, N, N, N, N}, /* N */ + { N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { N, X, X, X, X, X, X, X, X, X}, /* X */ + { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ }; /* @@ -176,7 +195,7 @@ static enum lock_type lock_combining_matrix[10][10]= #define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE #define A GOT_THE_LOCK #define x GOT_THE_LOCK -static enum lockman_getlock_result getlock_result[10][10]= +static const enum lockman_getlock_result getlock_result[10][10]= {/* N S X IS IX SIX LS LX SLX LSIX */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ @@ -200,37 +219,47 @@ static enum lockman_getlock_result getlock_result[10][10]= */ struct st_table_lock { +#warning do we need upgraded_from ? struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev; struct st_locked_table *table; uint16 loid; - char lock_type; + uchar lock_type; }; #define hash_insert my_hash_insert /* for consistency :) */ -#define remove_from_wait_queue(LOCK, TABLE) \ - do \ - { \ - if ((LOCK)->prev) \ - { \ - DBUG_ASSERT((TABLE)->wait_queue_out != (LOCK)); \ - (LOCK)->prev->next= (LOCK)->next; \ - } \ - else \ - { \ - DBUG_ASSERT((TABLE)->wait_queue_out == (LOCK)); \ - (TABLE)->wait_queue_out= (LOCK)->next; \ - } \ - if ((LOCK)->next) \ - { \ - DBUG_ASSERT((TABLE)->wait_queue_in != (LOCK)); \ - (LOCK)->next->prev= (LOCK)->prev; \ - } \ - else \ - { \ - DBUG_ASSERT((TABLE)->wait_queue_in == (LOCK)); \ - (TABLE)->wait_queue_in= (LOCK)->prev; \ - } \ - } while (0) + +static inline +TABLE_LOCK *find_loid(LOCKED_TABLE *table, uint16 loid) +{ + return (TABLE_LOCK *)hash_search(& table->latest_locks, + (byte *)& loid, sizeof(loid)); +} + +static inline +void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table) +{ + DBUG_ASSERT(table == lock->table); + if (lock->prev) + { + DBUG_ASSERT(table->wait_queue_out != lock); + lock->prev->next= lock->next; + } + else + { + DBUG_ASSERT(table->wait_queue_out == lock); + table->wait_queue_out= lock->next; + } + if (lock->next) + { + DBUG_ASSERT(table->wait_queue_in != lock); + lock->next->prev= lock->prev; + } + else + { + DBUG_ASSERT(table->wait_queue_in == lock); + table->wait_queue_in= lock->prev; + } +} /* DESCRIPTION @@ -243,24 +272,31 @@ enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, LOCKED_TABLE *table, enum lock_type lock) { - TABLE_LOCK *old, *new, *blocker; + TABLE_LOCK *old, *new, *blocker, *blocker2; TABLE_LOCK_OWNER *wait_for; ulonglong deadline; struct timespec timeout; enum lock_type new_lock; + enum lockman_getlock_result res; int i; - pthread_mutex_lock(& table->mutex); - /* do we alreasy have a lock on this resource ? */ - old= (TABLE_LOCK *)hash_search(& table->active, (byte *)&lo->loid, - sizeof(lo->loid)); + DBUG_ASSERT(lo->waiting_lock == 0); + DBUG_ASSERT(lo->waiting_for == 0); + DBUG_ASSERT(lo->waiting_for_loid == 0); - /* and if yes, is it enough to satisfy the new request */ - if (old && lock_combining_matrix[old->lock_type][lock] == old->lock_type) + pthread_mutex_lock(& table->mutex); + /* do we already have a lock on this resource ? */ + old= find_loid(table, lo->loid); + + /* calculate the level of the upgraded lock, if yes */ + new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; + + /* and check if old lock is enough to satisfy the new request */ + if (old && new_lock == old->lock_type) { /* yes */ - pthread_mutex_unlock(& table->mutex); - return getlock_result[old->lock_type][lock]; + res= getlock_result[old->lock_type][lock]; + goto ret; } /* no, placing a new lock. first - take a free lock structure from the pool */ @@ -275,48 +311,81 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, { pthread_mutex_unlock(& lm->pool_mutex); new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME)); - if (!new) + if (unlikely(!new)) { - pthread_mutex_unlock(& table->mutex); - return DIDNT_GET_THE_LOCK; + res= NO_MEMORY_FOR_LOCK; + goto ret; } } - /* calculate the level of the upgraded lock */ - new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; - new->loid= lo->loid; new->lock_type= new_lock; new->table= table; /* and try to place it */ - for (new->prev= table->wait_queue_in ; ; ) + for (new->prev= table->wait_queue_in;;) { - /* waiting queue is not empty and we're not upgrading */ - if (!old && new->prev) + wait_for= 0; + if (!old) { - /* need to wait */ - DBUG_ASSERT(table->wait_queue_out); - DBUG_ASSERT(table->wait_queue_in); - blocker= new->prev; - /* wait for a previous lock in the queue or for a lock it's waiting for */ - if (lock_compatibility_matrix[blocker->lock_type][lock]) - wait_for= lm->loid_to_tlo(blocker->loid)->waiting_for; - else - wait_for= lm->loid_to_tlo(blocker->loid); + /* not upgrading - a lock must be added to the _end_ of the wait queue */ + for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev) + { + TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid); + + /* find a blocking lock */ + DBUG_ASSERT(table->wait_queue_out); + DBUG_ASSERT(table->wait_queue_in); + if (!lock_compatibility_matrix[blocker->lock_type][lock]) + { + /* found! */ + wait_for= tmp; + } + else + { + /* + hmm, the lock before doesn't block us, let's look one step further. + the condition below means: + + if we never waited on a condition yet + OR + the lock before ours (blocker) waits on a lock (blocker2) that is + present in the hash AND and conflicts with 'blocker' + + the condition after OR may fail if 'blocker2' was removed from + the hash, its signal woke us up, but 'blocker' itself didn't see + the signal yet. + */ + if (!lo->waiting_lock || + ((blocker2= find_loid(table, tmp->waiting_for_loid)) && + !lock_compatibility_matrix[blocker2->lock_type] + [blocker->lock_type])) + { + /* but it's waiting for a real lock. we'll wait for the same lock */ + wait_for= tmp->waiting_for; + } + /* + otherwise - a lock it's waiting for doesn't exist. + We've no choice but to scan the wait queue backwards, looking + for a conflicting lock or a lock waiting for a real lock. + QQ is there a way to avoid this scanning ? + */ + } + } } - else + + if (wait_for == 0) { /* checking for compatibility with existing locks */ for (blocker= 0, i= 0; i < LOCK_TYPES; i++) { if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock]) { - /* the first lock in the list may be our own - skip it */ - for (blocker= table->active_locks[i]; - blocker && blocker->loid == lo->loid; - blocker= blocker->next) /* no-op */; - if (blocker) + blocker= table->active_locks[i]; + /* if the first lock in the list is our own - skip it */ + if (blocker->loid == lo->loid) + blocker= blocker->next; + if (blocker) /* found a conflicting lock, need to wait */ break; } } @@ -327,6 +396,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, /* ok, we're here - the wait is inevitable */ lo->waiting_for= wait_for; + lo->waiting_for_loid= wait_for->loid; if (!lo->waiting_lock) /* first iteration of the for() loop */ { /* lock upgrade or new lock request ? */ @@ -338,7 +408,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, new->next->prev= new; table->wait_queue_out= new; if (!table->wait_queue_in) - table->wait_queue_in=table->wait_queue_out; + table->wait_queue_in= table->wait_queue_out; } else { @@ -348,7 +418,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, new->prev->next= new; table->wait_queue_in= new; if (!table->wait_queue_out) - table->wait_queue_out=table->wait_queue_in; + table->wait_queue_out= table->wait_queue_in; } lo->waiting_lock= new; @@ -356,22 +426,28 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, timeout.tv_sec= deadline/10000000; timeout.tv_nsec= (deadline % 10000000) * 100; } - else - { - if (my_getsystime() > deadline) - { - pthread_mutex_unlock(& table->mutex); - return DIDNT_GET_THE_LOCK; - } - } - /* now really wait */ + /* + prepare to wait. + we must lock blocker's mutex to wait on blocker's cond. + and we must release table's mutex. + note that blocker's mutex is locked _before_ table's mutex is released + */ pthread_mutex_lock(wait_for->mutex); pthread_mutex_unlock(& table->mutex); - pthread_cond_timedwait(wait_for->cond, wait_for->mutex, &timeout); + /* now really wait */ + i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout); pthread_mutex_unlock(wait_for->mutex); + + if (i == ETIMEDOUT || i == ETIME) + { + /* we rely on the caller to rollback and release all locks */ + res= LOCK_TIMEOUT; + goto ret2; + } + pthread_mutex_lock(& table->mutex); /* ... and repeat from the beginning */ @@ -384,6 +460,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, remove_from_wait_queue(new, table); lo->waiting_lock= 0; lo->waiting_for= 0; + lo->waiting_for_loid= 0; } /* add it to the list of all locks of this lock owner */ @@ -396,20 +473,20 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, new->next->prev= new; table->active_locks[new_lock-1]= new; - /* remove the old lock from the hash, if upgrading */ + /* update the latest_locks hash */ if (old) - { - new->upgraded_from= old; - hash_delete(& table->active, (byte *)old); - } - else - new->upgraded_from= 0; + hash_delete(& table->latest_locks, (byte *)old); + hash_insert(& table->latest_locks, (byte *)new); - /* and add a new lock to the hash, voila */ - hash_insert(& table->active, (byte *)new); + new->upgraded_from= old; + res= getlock_result[lock][lock]; + +ret: pthread_mutex_unlock(& table->mutex); - return getlock_result[lock][lock]; +ret2: + DBUG_ASSERT(res); + return res; } /* @@ -443,6 +520,17 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) Signal our blocker to release this next lock (after we removed our lock from the wait queue, of course). */ + /* + An example to clarify the above: + trn1> S-lock the table. Granted. + trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1 + trn3> IS-lock the table. The queue is not empty, so IS-lock is added + to the queue. It's compatible with the waiting IX-lock, so trn3 + waits for trn2->waiting_for, that is trn1. + if trn1 releases the lock it signals trn1->cond and both waiting + transactions are awaken. But if trn2 times out, trn3 must be notified + too (as IS and S locks are compatible). So trn2 must signal trn1->cond. + */ if (lock->prev && lock_compatibility_matrix[lock->prev->lock_type][lock->lock_type]) { @@ -451,6 +539,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) pthread_mutex_unlock(lo->waiting_for->mutex); } lo->waiting_for= 0; + lo->waiting_for_loid= 0; pthread_mutex_unlock(& lock->table->mutex); lock->next= local_pool; @@ -465,11 +554,12 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) pthread_mutex_t *mutex= & lock->table->mutex; DBUG_ASSERT(cur->loid == lo->loid); + DBUG_ASSERT(lock != lock->next_in_lo); lock= lock->next_in_lo; /* TODO ? group locks by table to reduce the number of mutex locks */ pthread_mutex_lock(mutex); - hash_delete(& cur->table->active, (byte *)cur); + hash_delete(& cur->table->latest_locks, (byte *)cur); if (cur->prev) cur->prev->next= cur->next; @@ -506,7 +596,8 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) lm->pool= 0; lm->loid_to_tlo= func; lm->lock_timeout= timeout; - pthread_mutex_init(&lm->pool_mutex, MY_MUTEX_INIT_FAST); + pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST); + my_getsystime(); /* ensure that my_getsystime() is initialized */ } void tablockman_destroy(TABLOCKMAN *lm) @@ -517,36 +608,54 @@ void tablockman_destroy(TABLOCKMAN *lm) lm->pool= tmp->next; my_free((void *)tmp, MYF(0)); } - pthread_mutex_destroy(&lm->pool_mutex); + pthread_mutex_destroy(& lm->pool_mutex); } +/* + initialize a LOCKED_TABLE structure + + SYNOPSYS + lt a LOCKED_TABLE to initialize + initial_hash_size initial size for 'latest_locks' hash +*/ void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size) { - TABLE_LOCK *unused; bzero(lt, sizeof(*lt)); pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST); - hash_init(& lt->active, &my_charset_bin, initial_hash_size, - offsetof(TABLE_LOCK, loid), sizeof(unused->loid), 0, 0, 0); + hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size, + offsetof(TABLE_LOCK, loid), + sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0); } void tablockman_destroy_locked_table(LOCKED_TABLE *lt) { - hash_free(& lt->active); + int i; + + DBUG_ASSERT(lt->wait_queue_out == 0); + DBUG_ASSERT(lt->wait_queue_in == 0); + DBUG_ASSERT(lt->latest_locks.records == 0); + for (i= 0; iactive_locks[i] == 0); + + hash_free(& lt->latest_locks); pthread_mutex_destroy(& lt->mutex); } #ifdef EXTRA_DEBUG -static char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", +static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX"}; -void print_tlo(TABLE_LOCK_OWNER *lo) +void tablockman_print_tlo(TABLE_LOCK_OWNER *lo) { TABLE_LOCK *lock; + printf("lo%d>", lo->loid); if ((lock= lo->waiting_lock)) - printf(" (%s.%p)", lock2str[lock->lock_type], lock->table); - for (lock= lo->active_locks; lock && lock != lock->next_in_lo; lock= lock->next_in_lo) - printf(" %s.%p", lock2str[lock->lock_type], lock->table); + printf(" (%s.0x%lx)", lock2str[lock->lock_type], (intptr)lock->table); + for (lock= lo->active_locks; + lock && lock != lock->next_in_lo; + lock= lock->next_in_lo) + printf(" %s.0x%lx", lock2str[lock->lock_type], (intptr)lock->table); if (lock && lock == lock->next_in_lo) printf("!"); printf("\n"); diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h index d307b2c844f..4498e7027b4 100644 --- a/storage/maria/tablockman.h +++ b/storage/maria/tablockman.h @@ -33,45 +33,45 @@ LSIX - Loose Shared + Intention eXclusive */ #ifndef _lockman_h -enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX }; +#warning TODO remove N-locks +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; enum lockman_getlock_result { - DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK, + NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, + GOT_THE_LOCK, GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE }; - #endif -#define LOCK_TYPES LSIX +#define LOCK_TYPES (LOCK_TYPE_LAST-1) -typedef struct st_table_lock_owner TABLE_LOCK_OWNER; typedef struct st_table_lock TABLE_LOCK; -typedef struct st_locked_table LOCKED_TABLE; + +typedef struct st_table_lock_owner { + TABLE_LOCK *active_locks; /* list of active locks */ + TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */ + struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */ + pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */ +} TABLE_LOCK_OWNER; + +typedef struct st_locked_table { + pthread_mutex_t mutex; /* mutex for everything below */ + HASH latest_locks; /* latest locks in a hash */ + TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */ + TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/ +} LOCKED_TABLE; + typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16); typedef struct { pthread_mutex_t pool_mutex; - TABLE_LOCK *pool; /* lifo pool of free locks */ - uint lock_timeout; - loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */ + TABLE_LOCK *pool; /* lifo pool of free locks */ + uint lock_timeout; /* lock timeout in milliseconds */ + loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */ } TABLOCKMAN; -struct st_table_lock_owner { - TABLE_LOCK *active_locks; /* list of active locks */ - TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */ - TABLE_LOCK_OWNER *waiting_for; /* transaction we're wating for */ - pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */ - pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ - uint16 loid; /* Lock Owner IDentifier */ -}; - -struct st_locked_table { - pthread_mutex_t mutex; /* mutex for everything below */ - HASH active; /* active locks ina hash */ - TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */ - TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque */ -}; - void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint); void tablockman_destroy(TABLOCKMAN *); enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *, @@ -81,7 +81,7 @@ void tablockman_init_locked_table(LOCKED_TABLE *, int); void tablockman_destroy_locked_table(LOCKED_TABLE *); #ifdef EXTRA_DEBUG -void print_tlo(TABLE_LOCK_OWNER *); +void tablockman_print_tlo(TABLE_LOCK_OWNER *); #endif #endif diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 1c5281a3449..1df4c67b4aa 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -69,7 +69,8 @@ static TRN *short_trid_to_TRN(uint16 short_trid) return (TRN *)trn; } -static byte *trn_get_hash_key(const byte *trn, uint* len, my_bool unused) +static byte *trn_get_hash_key(const byte *trn, uint* len, + my_bool unused __attribute__ ((unused))) { *len= sizeof(TrID); return (byte *) & ((*((TRN **)trn))->trid); diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index 94b034bdaad..df8be054ba3 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -14,6 +14,10 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + lockman for row and table locks +*/ + //#define EXTRA_VERBOSE #include diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c index ab4b7beba9f..cf4791067dc 100644 --- a/storage/maria/unittest/lockman1-t.c +++ b/storage/maria/unittest/lockman1-t.c @@ -14,6 +14,10 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + lockman for row locks, tablockman for table locks +*/ + //#define EXTRA_VERBOSE #include @@ -64,7 +68,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid) #define lock_ok_l(O, R, L) \ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) #define lock_conflict(O, R, L) \ - test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); + test_lock(O, R, L, "cannot ", LOCK_TIMEOUT); void test_tablockman_simple() { @@ -164,8 +168,11 @@ int Ntables= 10; int table_lock_ratio= 10; enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; -char *res2str[4]= { +char *res2str[]= { "DIDN'T GET THE LOCK", + "OUT OF MEMORY", + "DEADLOCK", + "LOCK TIMEOUT", "GOT THE LOCK", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; @@ -191,7 +198,7 @@ pthread_handler_t test_lockman(void *arg) res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, lock2str[locklevel], res2str[res])); - if (res == DIDNT_GET_THE_LOCK) + if (res < GOT_THE_LOCK) { lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); DIAG(("loid %2d, release all locks", loid)); @@ -208,11 +215,6 @@ pthread_handler_t test_lockman(void *arg) lock2str[locklevel+4], res2str[res])); switch (res) { - case DIDNT_GET_THE_LOCK: - lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); - DIAG(("loid %2d, release all locks", loid)); - timeout++; - continue; case GOT_THE_LOCK: continue; case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: @@ -232,7 +234,10 @@ pthread_handler_t test_lockman(void *arg) DBUG_ASSERT(res == GOT_THE_LOCK); continue; default: - DBUG_ASSERT(0); + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; } } } diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c index c515fc7ecd7..18c3072b241 100644 --- a/storage/maria/unittest/lockman2-t.c +++ b/storage/maria/unittest/lockman2-t.c @@ -14,6 +14,10 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + tablockman for row and table locks +*/ + //#define EXTRA_VERBOSE #include @@ -57,7 +61,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid) #define lock_ok_l(O, R, L) \ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) #define lock_conflict(O, R, L) \ - test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); + test_lock(O, R, L, "cannot ", LOCK_TIMEOUT); void test_tablockman_simple() { @@ -165,14 +169,34 @@ void run_test(const char *test, pthread_handler handler, int n, int m) my_free((void*)threads, MYF(0)); } +static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK_OWNER backup= *lo; + + tablockman_release_locks(lm, lo); + /* + pthread_mutex_destroy(lo->mutex); + pthread_cond_destroy(lo->cond); + bzero(lo, sizeof(*lo)); + + lo->mutex= backup.mutex; + lo->cond= backup.cond; + lo->loid= backup.loid; + pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(lo->cond, 0);*/ +} + pthread_mutex_t rt_mutex; int Nrows= 100; int Ntables= 10; int table_lock_ratio= 10; enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; -char *res2str[4]= { - "DIDN'T GET THE LOCK", +char *res2str[]= { + 0, + "OUT OF MEMORY", + "DEADLOCK", + "LOCK TIMEOUT", "GOT THE LOCK", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; @@ -200,9 +224,9 @@ pthread_handler_t test_lockman(void *arg) res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, lock2str[locklevel], res2str[res])); - if (res == DIDNT_GET_THE_LOCK) + if (res < GOT_THE_LOCK) { - tablockman_release_locks(&tablockman, lo1); + reinit_tlo(&tablockman, lo1); DIAG(("loid %2d, release all locks", loid)); timeout++; continue; @@ -217,11 +241,6 @@ pthread_handler_t test_lockman(void *arg) lock2str[locklevel+4], res2str[res])); switch (res) { - case DIDNT_GET_THE_LOCK: - tablockman_release_locks(&tablockman, lo1); - DIAG(("loid %2d, release all locks", loid)); - timeout++; - continue; case GOT_THE_LOCK: continue; case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: @@ -230,9 +249,9 @@ pthread_handler_t test_lockman(void *arg) res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]); DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, lock2str[locklevel], res2str[res])); - if (res == DIDNT_GET_THE_LOCK) + if (res < GOT_THE_LOCK) { - tablockman_release_locks(&tablockman, lo1); + reinit_tlo(&tablockman, lo1); DIAG(("loid %2d, release all locks", loid)); timeout++; continue; @@ -240,12 +259,15 @@ pthread_handler_t test_lockman(void *arg) DBUG_ASSERT(res == GOT_THE_LOCK); continue; default: - DBUG_ASSERT(0); + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; } } } - tablockman_release_locks(&tablockman, lo1); + reinit_tlo(&tablockman, lo1); pthread_mutex_lock(&rt_mutex); rt_num_threads--; @@ -264,7 +286,7 @@ int main() my_init(); pthread_mutex_init(&rt_mutex, 0); - plan(39); + plan(40); if (my_atomic_initialize()) return exit_status(); @@ -299,7 +321,7 @@ int main() Nrows= 100; Ntables= 10; table_lock_ratio= 10; - //run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); #if 0 /* "real-life" simulation - many rows, no table locks */ Nrows= 1000000; From 6691ce30ecd82baf9877242c078a5e1d12a78369 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 16 Nov 2006 22:53:53 +0100 Subject: [PATCH 60/95] Maria - WL#3134 Key cache to Page cache conversion Moving the test_pagecache_ tests from mysys to unittest/mysys. Means fixing includes to work with the new directory, some Makefile.am editing, replacing memset() with bfill(). test_page_cache_*.c renamed to mf_pagecache_*-t.c (-t is the standard suffix for tests in the mytap protocol). Also added plan() and exit_status() calls to tests. Sanja, I put some TODOs for you at the start of mf_pagecache_*.c unittest/mysys/test_file.h: Rename: mysys/test_file.h -> unittest/mysys/test_file.h mysys/Makefile.am: pagecache test files move to top/unittest/mysys mysys/mf_pagecache.c: my_bit.h needed to compile. unittest/mysys/Makefile.am: INCLUDES is a better place for includes than AM_CPPFLAGS (the latter get overriden by prog_CPPFLAGS, which is not desirable here). Adding pagecache's test programs (moved from mysys); test_pagecache_* has been renamed to mf_pagecache*-t (-t is the required suffix for test executables in the mytap framework). unittest/mysys/mf_pagecache_consist.c: fixing includes to work with the new directory. The test must return an exit code informing if any part failed. TODOs for Sanja. unittest/mysys/mf_pagecache_single.c: fixing includes to work with new directory. adding a plan() to account for the number of tests. Adding exit_status() to tell how many tests failed. memset() was giving a compilation warning (implicit declaration etc), properly due to me removing stdio.h etc, so I replaced it with bfill(). TODOs for Sanja. unittest/mysys/test_file.c: moved from mysys (piece of the page cache tests) and includes fixed. --- mysys/Makefile.am | 55 ------------------- mysys/mf_pagecache.c | 2 + unittest/mysys/Makefile.am | 46 +++++++++++++++- .../mysys/mf_pagecache_consist.c | 24 ++++---- .../mysys/mf_pagecache_single.c | 42 +++++++------- {mysys => unittest/mysys}/test_file.c | 8 +-- {mysys => unittest/mysys}/test_file.h | 0 7 files changed, 79 insertions(+), 98 deletions(-) rename mysys/test_pagecache_consist.c => unittest/mysys/mf_pagecache_consist.c (97%) rename mysys/test_pagecache_single.c => unittest/mysys/mf_pagecache_single.c (95%) rename {mysys => unittest/mysys}/test_file.c (93%) rename {mysys => unittest/mysys}/test_file.h (100%) diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 2f8b34dc301..4d9570febbd 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -130,61 +130,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c -test_mf_pagecache.o: mf_pagecache.c ../include/pagecache.h $(LIBRARIES) - $(CP) $(srcdir)/mf_pagecache.c test_mf_pagecache.c - $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_mf_pagecache.c - -test_file.o: test_file.c test_file.h - $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_file.c - -test_pagecache_single1k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) - -test_pagecache_single8k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=8192 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) - -test_pagecache_single64k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) - -test_pagecache_single: test_pagecache_single1k$(EXEEXT) test_pagecache_single8k$(EXEEXT) test_pagecache_single64k$(EXEEXT) - ./test_pagecache_single64k$(EXEEXT) - ./test_pagecache_single8k$(EXEEXT) - ./test_pagecache_single1k$(EXEEXT) - -test_pagecache_consist1k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist1kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist1kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist1kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist: test_pagecache_consist1k$(EXEEXT) test_pagecache_consist64k$(EXEEXT) test_pagecache_consist1kHC$(EXEEXT) test_pagecache_consist64kHC$(EXEEXT) test_pagecache_consist1kRD$(EXEEXT) test_pagecache_consist64kRD$(EXEEXT) test_pagecache_consist1kWR$(EXEEXT) test_pagecache_consist64kWR$(EXEEXT) - ./test_pagecache_consist1k$(EXEEXT) - ./test_pagecache_consist64k$(EXEEXT) - ./test_pagecache_consist1kHC$(EXEEXT) - ./test_pagecache_consist64kHC$(EXEEXT) - ./test_pagecache_consist1kRD$(EXEEXT) - ./test_pagecache_consist64kRD$(EXEEXT) - ./test_pagecache_consist1kWR$(EXEEXT) - ./test_pagecache_consist64kWR$(EXEEXT) - # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index e1ccab9e06f..d8e235bc7f4 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -45,6 +45,8 @@ #include #include #include +#include + /* Some compilation flags have been added specifically for this module diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index 8e6255b9e68..9b230272329 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -1,11 +1,51 @@ +INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap -AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include -AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a -noinst_PROGRAMS = bitmap-t base64-t my_atomic-t +noinst_PROGRAMS = bitmap-t base64-t my_atomic-t \ + mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ + mf_pagecache_single_64k-t \ + mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \ + mf_pagecache_consist_1kHC-t mf_pagecache_consist_64kHC-t \ + mf_pagecache_consist_1kRD-t mf_pagecache_consist_64kRD-t \ + mf_pagecache_consist_1kWR-t mf_pagecache_consist_64kWR-t +# tests for mysys/mf_pagecache.c + +mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN + +mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 +mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY + +mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS +mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS + +mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS +mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS + +CLEANFILES = my_pagecache_debug.log page_cache_test_file_1 diff --git a/mysys/test_pagecache_consist.c b/unittest/mysys/mf_pagecache_consist.c similarity index 97% rename from mysys/test_pagecache_consist.c rename to unittest/mysys/mf_pagecache_consist.c index 86698fbb860..f7724222a09 100755 --- a/mysys/test_pagecache_consist.c +++ b/unittest/mysys/mf_pagecache_consist.c @@ -1,16 +1,14 @@ -#include "mysys_priv.h" -#include "../include/my_pthread.h" -#include "../include/pagecache.h" -#include -#include "my_dir.h" -#include -#include -#include -#include -#include "../unittest/mytap/tap.h" -#include +/* + TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in + my_atomic-t.c (see BUG#22320). + Use diag() instead of fprintf(stderr). Use ok() and plan(). +*/ + +#include +#include +#include +#include "test_file.h" -/*#define PAGE_SIZE 65536*/ #define PCACHE_SIZE (PAGE_SIZE*1024*8) #ifndef DBUG_OFF @@ -443,5 +441,5 @@ int main(int argc, char **argv __attribute__((unused))) DBUG_PRINT("info", ("Program end")); - DBUG_RETURN(0); + DBUG_RETURN(exit_status()); } diff --git a/mysys/test_pagecache_single.c b/unittest/mysys/mf_pagecache_single.c similarity index 95% rename from mysys/test_pagecache_single.c rename to unittest/mysys/mf_pagecache_single.c index 9df7844cfa5..49ecd2986ab 100644 --- a/mysys/test_pagecache_single.c +++ b/unittest/mysys/mf_pagecache_single.c @@ -1,16 +1,13 @@ -#include "mysys_priv.h" -#include "../include/my_pthread.h" -#include "../include/pagecache.h" -#include "my_dir.h" -#include -#include -#include -#include -#include -#include "../unittest/mytap/tap.h" +/* + TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in + my_atomic-t.c (see BUG#22320). + Use diag() instead of fprintf(stderr). +*/ +#include +#include +#include #include "test_file.h" -/* #define PAGE_SIZE 1024 */ #define PCACHE_SIZE (PAGE_SIZE*1024*10) #ifndef DBUG_OFF @@ -100,7 +97,7 @@ int simple_read_write_test() unsigned char *buffr= malloc(PAGE_SIZE); int res; DBUG_ENTER("simple_read_write_test"); - memset(buffw, '\1', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\1'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, @@ -136,7 +133,7 @@ int simple_read_change_write_read_test() int res; DBUG_ENTER("simple_read_change_write_read_test"); /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\1'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, @@ -149,7 +146,7 @@ int simple_read_change_write_read_test() PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE, 0); - memset(buffw, '\65', PAGE_SIZE/2); + bfill(buffw, PAGE_SIZE/2, '\65'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE_UNLOCK, @@ -190,7 +187,7 @@ int simple_pin_test() int res; DBUG_ENTER("simple_pin_test"); /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\1'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, @@ -213,7 +210,7 @@ int simple_pin_test() PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DELAY, 0); - memset(buffw + PAGE_SIZE/2, ((unsigned char) 129), PAGE_SIZE/2); + bfill(buffw + PAGE_SIZE/2, PAGE_SIZE/2, ((unsigned char) 129)); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE_TO_READ, @@ -268,7 +265,7 @@ int simple_delete_forget_test() int res; DBUG_ENTER("simple_delete_forget_test"); /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\1'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, @@ -277,7 +274,7 @@ int simple_delete_forget_test() 0); flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); /* test */ - memset(buffw, '\2', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\2'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, @@ -310,7 +307,7 @@ int simple_delete_flush_test() int res; DBUG_ENTER("simple_delete_flush_test"); /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\1'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE, @@ -319,7 +316,7 @@ int simple_delete_flush_test() 0); flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); /* test */ - memset(buffw, '\2', PAGE_SIZE); + bfill(buffw, PAGE_SIZE, '\2'); pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_WRITELOCKED, @@ -356,7 +353,7 @@ int simple_big_test() /* prepare the file twice larger then cache */ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) { - memset(buffw, (unsigned char) (i & 0xff), PAGE_SIZE); + bfill(buffw, PAGE_SIZE, (unsigned char) (i & 0xff)); desc[i].length= PAGE_SIZE; desc[i].content= (i & 0xff); pagecache_write(&pagecache, &file1, i, 3, (char*)buffw, @@ -528,6 +525,7 @@ int main(int argc, char **argv __attribute__((unused))) my_thread_global_init(); + plan(12); if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, PAGE_SIZE, 0)) == 0) @@ -585,5 +583,5 @@ int main(int argc, char **argv __attribute__((unused))) DBUG_PRINT("info", ("Program end")); - DBUG_RETURN(0); + DBUG_RETURN(exit_status()); } diff --git a/mysys/test_file.c b/unittest/mysys/test_file.c similarity index 93% rename from mysys/test_file.c rename to unittest/mysys/test_file.c index 7ffca48023d..758d0bfa81b 100644 --- a/mysys/test_file.c +++ b/unittest/mysys/test_file.c @@ -1,8 +1,6 @@ -#include "mysys_priv.h" -#include "my_dir.h" -#include -#include -#include +#include +#include +#include #include "test_file.h" diff --git a/mysys/test_file.h b/unittest/mysys/test_file.h similarity index 100% rename from mysys/test_file.h rename to unittest/mysys/test_file.h From 3becab22e9dd774d58983e553b6fbbfb9960f852 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 16 Nov 2006 23:04:53 +0100 Subject: [PATCH 61/95] Maria: fix for compiler warning (signed-ness). #warning to warn (!) about current issues in the pagecache hindering checkpoint (we will have to fix them!). mysys/mf_pagecache.c: fix for compiler warning (signed-ness). #warning to warn (!) about current issues in the pagecache hindering checkpoint (we will have to fix them!). --- mysys/mf_pagecache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index d8e235bc7f4..ab27e056c37 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -699,7 +699,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, pagecache->shift= my_bit_log2(block_size); DBUG_PRINT("info", ("block_size: %u", block_size)); - DBUG_ASSERT((1 << pagecache->shift) == block_size); + DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size); blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + 2 * sizeof(PAGECACHE_HASH_LINK) + @@ -3259,6 +3259,7 @@ restart: #ifndef DBUG_OFF int rc= #endif +#warning we are doing an unlock here, so need to give the page its rec_lsn! pagecache_make_lock_and_pin(pagecache, block, write_lock_change_table[lock].unlock_lock, write_pin_change_table[pin].unlock_pin); @@ -3614,6 +3615,11 @@ restart: else { /* Link the block into a list of blocks 'in switch' */ +#warning this unlink_changed() is a serious problem for Maria's Checkpoint: it \ +removes a page from the list of dirty pages, while it's still dirty. A \ + solution is to abandon first_in_switch, just wait for this page to be \ + flushed by somebody else, and loop. TODO: check all places where we remove a \ + page from the list of dirty pages unlink_changed(block); link_changed(block, &first_in_switch); } From 6ea0c5c7d6dcddbdade4e7f41273c09284b739b1 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 17 Nov 2006 14:41:33 +0200 Subject: [PATCH 62/95] Post review changes. mysys/mf_pagecache.c: Unneed volitile definitions removed. Comments added. info_check_lock() code simplified. pagecache_delete_page() fixed. --- mysys/mf_pagecache.c | 144 +++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 81 deletions(-) diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index ab27e056c37..4b92f68d9bf 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -127,11 +127,10 @@ my_bool my_disable_flush_pagecache_blocks= 0; (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) /* types of condition variables */ -#define COND_FOR_REQUESTED 0 -#define COND_FOR_SAVED 1 -#define COND_FOR_WRLOCK 2 -#define COND_FOR_COPY 3 -#define COND_SIZE 4 +#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */ +#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */ +#define COND_FOR_WRLOCK 2 /* queue of write lock */ +#define COND_SIZE 3 /* number of COND_* queues */ typedef pthread_cond_t KEYCACHE_CONDVAR; @@ -296,8 +295,8 @@ struct st_pagecache_block_link wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ uint requests; /* number of requests for the block */ byte *buffer; /* buffer for the block page */ - volatile uint status; /* state of the block */ - volatile uint pins; /* pin counter */ + uint status; /* state of the block */ + uint pins; /* pin counter */ #ifdef PAGECACHE_DEBUG PAGECACHE_PIN_INFO *pin_list; PAGECACHE_LOCK_INFO *lock_list; @@ -354,6 +353,20 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, DBUG_RETURN(0); } + +/* + Debug function which checks current lock/pin state and requested changes + + SYNOPSIS + info_check_lock() + lock requested lock changes + pin requested pin changes + + RETURN + 0 - OK + 1 - Error +*/ + my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin) @@ -368,90 +381,58 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, case PAGECACHE_LOCK_LEFT_UNLOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED); if (info) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->U", - (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_LEFT_READLOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_PIN_LEFT_PINNED); if (info == 0 || info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->R", - (ulong)thread, (ulong)block, (info?'W':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_LEFT_WRITELOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED); if (info == 0 || !info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->W", - (ulong)thread, (ulong)block, (info?'R':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_READ: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_PIN); if (info != 0) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->R", - (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_WRITE: DBUG_ASSERT(pin == PAGECACHE_PIN); if (info != 0) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->W", - (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); - DBUG_RETURN(1); - } + goto error; break; - case PAGECACHE_LOCK_READ_UNLOCK: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN); if (info == 0 || info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->U", - (ulong)thread, (ulong)block, (info?'W':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_WRITE_UNLOCK: DBUG_ASSERT(pin == PAGECACHE_UNPIN); if (info == 0 || !info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", - (ulong)thread, (ulong)block, (info?'R':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_WRITE_TO_READ: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED || pin == PAGECACHE_UNPIN); if (info == 0 || !info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", - (ulong)thread, (ulong)block, (info?'R':'U'))); - DBUG_RETURN(1); - } + goto error; break; } DBUG_RETURN(0); +error: + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d," + "to lock: %s, to pin: %s", + (ulong)thread, (ulong)block, test(info), + (info ? info->write_lock : 0), + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_RETURN(1); } #endif @@ -1629,7 +1610,8 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) /* - Get the hash link for the page if it is in the cache + Get the hash link for the page if it is in the cache (do not put the + page in the cache if it is absent there) SYNOPSIS get_present_hash_link() @@ -2982,6 +2964,7 @@ restart: if (!link) { DBUG_PRINT("info", ("There is no such page in the cache")); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(0); } block= link->block; @@ -2998,32 +2981,31 @@ restart: if (block->status & BLOCK_CHANGED && flush) { - if (flush) + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + + if (error) { - /* The block contains a dirty page - push it out of the cache */ - - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - The call is thread safe because only the current - thread might change the block->hash_link value - */ - DBUG_ASSERT(block->pins == 1); - error= pagecache_fwrite(pagecache, - &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache->global_cache_write++; - if (error) - { - block->status|= BLOCK_ERROR; - goto err; - } + block->status|= BLOCK_ERROR; + goto err; } + pagecache->blocks_changed--; pagecache->global_blocks_changed--; /* From 4b10971a3a887c176e3e71d092dd4cf4b4f68f3b Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 19 Nov 2006 21:32:16 +0100 Subject: [PATCH 63/95] post-review fixes --- storage/maria/tablockman.c | 73 ++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c index b4cf77401bb..d8dffa09a5e 100644 --- a/storage/maria/tablockman.c +++ b/storage/maria/tablockman.c @@ -229,7 +229,7 @@ struct st_table_lock { #define hash_insert my_hash_insert /* for consistency :) */ static inline -TABLE_LOCK *find_loid(LOCKED_TABLE *table, uint16 loid) +TABLE_LOCK *find_by_loid(LOCKED_TABLE *table, uint16 loid) { return (TABLE_LOCK *)hash_search(& table->latest_locks, (byte *)& loid, sizeof(loid)); @@ -286,7 +286,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, pthread_mutex_lock(& table->mutex); /* do we already have a lock on this resource ? */ - old= find_loid(table, lo->loid); + old= find_by_loid(table, lo->loid); /* calculate the level of the upgraded lock, if yes */ new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; @@ -340,37 +340,50 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, { /* found! */ wait_for= tmp; + break; } - else + + /* + hmm, the lock before doesn't block us, let's look one step further. + the condition below means: + + if we never waited on a condition yet + OR + the lock before ours (blocker) waits on a lock (blocker2) that is + present in the hash AND and conflicts with 'blocker' + + the condition after OR may fail if 'blocker2' was removed from + the hash, its signal woke us up, but 'blocker' itself didn't see + the signal yet. + */ + if (!lo->waiting_lock || + ((blocker2= find_by_loid(table, tmp->waiting_for_loid)) && + !lock_compatibility_matrix[blocker2->lock_type] + [blocker->lock_type])) { + /* but it's waiting for a real lock. we'll wait for the same lock */ + wait_for= tmp->waiting_for; /* - hmm, the lock before doesn't block us, let's look one step further. - the condition below means: - - if we never waited on a condition yet - OR - the lock before ours (blocker) waits on a lock (blocker2) that is - present in the hash AND and conflicts with 'blocker' - - the condition after OR may fail if 'blocker2' was removed from - the hash, its signal woke us up, but 'blocker' itself didn't see - the signal yet. - */ - if (!lo->waiting_lock || - ((blocker2= find_loid(table, tmp->waiting_for_loid)) && - !lock_compatibility_matrix[blocker2->lock_type] - [blocker->lock_type])) - { - /* but it's waiting for a real lock. we'll wait for the same lock */ - wait_for= tmp->waiting_for; - } - /* - otherwise - a lock it's waiting for doesn't exist. - We've no choice but to scan the wait queue backwards, looking - for a conflicting lock or a lock waiting for a real lock. - QQ is there a way to avoid this scanning ? + We don't really need tmp->waiting_for, as tmp->waiting_for_loid + is enough. waiting_for is just a local cache to avoid calling + loid_to_tlo(). + But it's essensial that tmp->waiting_for pointer can ONLY + be dereferenced if find_by_loid() above returns a non-null + pointer, because a TABLE_LOCK_OWNER object that it points to + may've been freed when we come here after a signal. + In particular tmp->waiting_for_loid cannot be replaced + with tmp->waiting_for->loid. */ + DBUG_ASSERT(wait_for == lm->loid_to_tlo(tmp->waiting_for_loid)); + break; } + + /* + otherwise - a lock it's waiting for doesn't exist. + We've no choice but to scan the wait queue backwards, looking + for a conflicting lock or a lock waiting for a real lock. + QQ is there a way to avoid this scanning ? + */ } } @@ -531,8 +544,8 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) transactions are awaken. But if trn2 times out, trn3 must be notified too (as IS and S locks are compatible). So trn2 must signal trn1->cond. */ - if (lock->prev && - lock_compatibility_matrix[lock->prev->lock_type][lock->lock_type]) + if (lock->next && + lock_compatibility_matrix[lock->next->lock_type][lock->lock_type]) { pthread_mutex_lock(lo->waiting_for->mutex); pthread_cond_broadcast(lo->waiting_for->cond); From a41ac15b960aee306e3464835b05a835fd98771d Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 21 Nov 2006 22:22:59 +0100 Subject: [PATCH 64/95] Maria - various fixes around durability of files: 1) on Mac OS X >=10.3, fcntl() is recommended over fsync (from the man page: "[With fsync()] the disk drive may also re-order the data so that later writes may be present while earlier writes are not. Applications such as databases that require a strict ordering of writes should use F_FULLFSYNC to ensure their data is written in the order they expect"). I have seen two other pieces of software changing from fsync to F_FULLFSYNC on Mac OS X. 2) to make a file creation/deletion/renaming durable on Linux (at least ext2 as I have tested) (see "man fsync"), a fsync() on the directory is needed: new functions to do that, and a flag MY_SYNC_DIR to do it in my_create/my_delete/my_rename. 3) now using this directory syncing when creating he frm if opt_sync_frm, and for Maria's control file when it is created. include/my_sys.h: new flag to my_create/my_delete/my_rename, which asks to sync the directory after the operation is done (currently does nothing except on Linux) libmysql/CMakeLists.txt: my_create() now depends on my_sync() so my_sync is needed for libmysql libmysql/Makefile.shared: my_create() now depends on my_sync() so my_sync is needed for libmysql mysys/my_create.c: my_create() can now sync the directory if asked for mysys/my_delete.c: my_delete() can now sync the directory if asked for mysys/my_open.c: it was a bug that my_close() is done on fd but a positive fd would still be returned, by my_register_filename(). mysys/my_rename.c: my_rename() can now sync the two directories (the one of "from" and the one of "to") if asked for. mysys/my_sync.c: On recent Mac OS X, fcntl(F_FULLFSYNC) is recommended over fsync() (see "man fsync" on Mac OS X 10.3). my_sync_dir(): to sync a directory after a file creation/deletion/ renaming; can be called directly or via MY_SYNC_DIR in my_create/ my_delete/my_rename(). No-op except on Linux (see "man fsync" on Linux). my_sync_dir_from_file(): same as above, just more practical when the caller has a file name but no directory name ready. Should the #warning even be a #error? I mean do we want to release binaries which don't guarantee any durability? sql/log.cc: a TODO for the future. sql/unireg.cc: If we sync the frm it makes sense to also sync its creation in the directory. storage/maria/ma_control_file.c: control file is vital, try to make it to disk --- include/my_sys.h | 3 ++ libmysql/CMakeLists.txt | 1 + libmysql/Makefile.shared | 2 +- mysys/my_create.c | 3 ++ mysys/my_delete.c | 2 + mysys/my_open.c | 1 + mysys/my_rename.c | 5 +++ mysys/my_sync.c | 74 +++++++++++++++++++++++++++++++++ sql/log.cc | 5 +++ sql/unireg.cc | 9 ++-- storage/maria/ma_control_file.c | 11 ++--- 11 files changed, 104 insertions(+), 12 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index fc83c583201..3c5b5e6cbf4 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -55,6 +55,7 @@ extern int NEAR my_errno; /* Last error in mysys */ #define MY_WME 16 /* Write message on error */ #define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */ #define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */ +#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */ #define MY_RAID 64 /* Support for RAID */ #define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */ #define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */ @@ -622,6 +623,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern int my_fclose(FILE *fd,myf MyFlags); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_sync(File fd, myf my_flags); +extern void my_sync_dir(const char *dir_name, myf my_flags); +extern void my_sync_dir_by_file(const char *file_name, myf my_flags); extern int my_error _VARARGS((int nr,myf MyFlags, ...)); extern int my_printf_error _VARARGS((uint my_err, const char *format, myf MyFlags, ...)) diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index d12b6ca6c10..3c9ed53a866 100644 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -37,6 +37,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def ../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c ../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c ../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c + ../mysys/my_sync.c ../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c ../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c ../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index c2d98a81042..6326e255559 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ mf_iocache2.lo my_seek.lo my_sleep.lo \ my_pread.lo mf_cache.lo md5.lo sha1.lo \ my_getopt.lo my_gethostbyname.lo my_port.lo \ - my_rename.lo my_chsize.lo + my_rename.lo my_chsize.lo my_sync.lo sqlobjects = net.lo sql_cmn_objects = pack.lo client.lo my_time.lo diff --git a/mysys/my_create.c b/mysys/my_create.c index e1e32b50842..bb3801691a5 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -53,6 +53,9 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif + if ((MyFlags & MY_SYNC_DIR) && (fd >=0)) + my_sync_dir_by_file(FileName, MyFlags); + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); } /* my_create */ diff --git a/mysys/my_delete.c b/mysys/my_delete.c index de2a9814a56..6d90caa48ed 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -30,6 +30,8 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } + else if (MyFlags & MY_SYNC_DIR) + my_sync_dir_by_file(name, MyFlags); DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_open.c b/mysys/my_open.c index ab2f7c9ff27..344e9c0a43b 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -162,6 +162,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type } pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); + fd= -1; my_errno=ENOMEM; } else diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 9c27238cc72..2c9ace6223a 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -61,5 +61,10 @@ int my_rename(const char *from, const char *to, myf MyFlags) if (MyFlags & (MY_FAE+MY_WME)) my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno); } + else if (MyFlags & MY_SYNC_DIR) + { + my_sync_dir_by_file(from, MyFlags); + my_sync_dir_by_file(to, MyFlags); + } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index c557324b52c..eaa26ef07a7 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -49,6 +49,12 @@ int my_sync(File fd, myf my_flags) do { +#if defined(F_FULLFSYNC) + /* Recent Mac OS X versions insist this call is safer than fsync() */ + if (!(res= fcntl(fd, F_FULLFSYNC, 0))) + break; /* ok */ + /* Some fs don't support F_FULLFSYNC and fail above, fallback: */ +#endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); #elif defined(HAVE_FSYNC) @@ -56,6 +62,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else +#warning Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -74,3 +81,70 @@ int my_sync(File fd, myf my_flags) DBUG_RETURN(res); } /* my_sync */ + +/* + Force directory information to disk. Only Linux is known to need this to + make sure a file creation/deletion/renaming in(from,to) this directory + durable. + + SYNOPSIS + my_sync_dir() + dir_name the name of the directory + my_flags unused + + RETURN + nothing (the sync may fail sometimes). +*/ +void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused))) +{ +#ifdef TARGET_OS_LINUX + DBUG_ENTER("my_sync_dir"); + DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); + File dir_fd; + int error= 0; + /* + Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) : + ignore errors. But print them to the debug log. + */ + if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0)) + { + if (my_sync(dir_fd, MYF(0))) + { + error= errno; + DBUG_PRINT("info",("my_sync failed errno: %d", error)); + } + my_close(dir_fd, MYF(0)); + } + else + { + error= errno; + DBUG_PRINT("info",("my_open failed errno: %d", error)); + } + DBUG_VOID_RETURN; +#endif +} + + +/* + Force directory information to disk. Only Linux is known to need this to + make sure a file creation/deletion/renaming in(from,to) this directory + durable. + + SYNOPSIS + my_sync_dir_by_file() + file_name the name of a file in the directory + my_flags unused + + RETURN + nothing (the sync may fail sometimes). +*/ +void my_sync_dir_by_file(const char *file_name, + myf my_flags __attribute__((unused))) +{ +#ifdef TARGET_OS_LINUX + char dir_name[FN_REFLEN]; + dirname_part(dir_name, file_name); + return my_sync_dir(dir_name, my_flags); +#endif +} + diff --git a/sql/log.cc b/sql/log.cc index b63ec563baf..cbba6ec1ddf 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2102,6 +2102,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg, my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), 0, MYF(MY_WME | MY_WAIT_IF_FULL))) { + /* + TODO: all operations creating/deleting the index file or a log, should + call my_sync_dir() or my_sync_dir_by_file() to be durable. + TODO: file creation should be done with my_create() not my_open(). + */ if (index_file_nr >= 0) my_close(index_file_nr,MYF(0)); return TRUE; diff --git a/sql/unireg.cc b/sql/unireg.cc index 2ea572c782c..5c2997c1483 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -285,9 +285,12 @@ bool mysql_create_frm(THD *thd, const char *file_name, my_free((gptr) screen_buff,MYF(0)); my_free((gptr) keybuff, MYF(0)); - if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) && - my_sync(file, MYF(MY_WME))) - goto err2; + if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) + { + if (my_sync(file, MYF(MY_WME))) + goto err2; + my_sync_dir_by_file(file_name, MYF(0)); + } if (my_close(file,MYF(MY_WME))) goto err3; diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 5090fac4182..47583466cd7 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -134,16 +134,11 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() if (create_file) { - if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0) + if ((control_file_fd= my_create(name, 0, + open_flags, MYF(MY_SYNC_DIR))) < 0) DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); - /* - TODO: from "man fsync" on Linux: - "fsync does not necessarily ensure that the entry in the directory - containing the file has also reached disk. For that an explicit - fsync on the file descriptor of the directory is also needed." - So if we just created the file we should sync the directory. - Maybe there should be a flag of my_create() to do this. + /* To be safer we should make sure that there are no logs or data/index files around (indeed it could be that the control file alone was deleted or not restored, and we should not go on with life at this point). From 354d4a7e06074beb0a08afab06b0f5dfd63d7d1d Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Nov 2006 15:53:26 +0100 Subject: [PATCH 65/95] shut up signedness warnings in atomic ops in gcc unittest/mysys/my_atomic-t.c: try both signed and unsigned arguments for atomic ops --- include/my_atomic.h | 139 ++++++++++++++++++++++++----------- unittest/mysys/my_atomic-t.c | 5 +- 2 files changed, 101 insertions(+), 43 deletions(-) diff --git a/include/my_atomic.h b/include/my_atomic.h index 78121c59c40..7b1b56815e0 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -61,70 +61,115 @@ #endif #ifndef make_atomic_add_body -#define make_atomic_add_body(S) \ +#define make_atomic_add_body(S) \ int ## S tmp=*a; \ while (!my_atomic_cas ## S(a, &tmp, tmp+v)); \ v=tmp; #endif +#ifdef __GNUC__ +/* + we want to be able to use my_atomic_xxx functions with + both signed and unsigned integers. But gcc will issue a warning + "passing arg N of `my_atomic_XXX' as [un]signed due to prototype" + if the signedness of the argument doesn't match the prototype, or + "pointer targets in passing argument N of my_atomic_XXX differ in signedness" + if int* is used where uint* is expected (or vice versa). + Let's shut these warnings up +*/ +#define make_transparent_unions(S) \ + typedef union { \ + int ## S i; \ + uint ## S u; \ + } U_ ## S __attribute__ ((transparent_union)); \ + typedef union { \ + int ## S volatile *i; \ + uint ## S volatile *u; \ + } Uv_ ## S __attribute__ ((transparent_union)); +#define uintptr intptr +make_transparent_unions(8) +make_transparent_unions(16) +make_transparent_unions(32) +make_transparent_unions(ptr) +#undef uintptr +#undef make_transparent_unions +#define a U_a.i +#define cmp U_cmp.i +#define v U_v.i +#define set U_set.i +#else +#define U_8 int8 +#define U_16 int16 +#define U_32 int32 +#define U_ptr intptr +#define Uv_8 int8 +#define Uv_16 int16 +#define Uv_32 int32 +#define Uv_ptr intptr +#define U_a volatile *a +#define U_cmp *cmp +#define U_v v +#define U_set set +#endif /* __GCC__ transparent_union magic */ + #ifdef HAVE_INLINE -#define make_atomic_add(S) \ -STATIC_INLINE int ## S my_atomic_add ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_add_body(S); \ - return v; \ +#define make_atomic_add(S) \ +STATIC_INLINE int ## S my_atomic_add ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_add_body(S); \ + return v; \ } -#define make_atomic_fas(S) \ -STATIC_INLINE int ## S my_atomic_fas ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_fas_body(S); \ - return v; \ +#define make_atomic_fas(S) \ +STATIC_INLINE int ## S my_atomic_fas ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_fas_body(S); \ + return v; \ } -#define make_atomic_cas(S) \ -STATIC_INLINE int my_atomic_cas ## S(int ## S volatile *a, \ - int ## S *cmp, int ## S set) \ -{ \ - int8 ret; \ - make_atomic_cas_body(S); \ - return ret; \ +#define make_atomic_cas(S) \ +STATIC_INLINE int my_atomic_cas ## S(Uv_ ## S U_a, \ + Uv_ ## S U_cmp, U_ ## S U_set) \ +{ \ + int8 ret; \ + make_atomic_cas_body(S); \ + return ret; \ } -#define make_atomic_load(S) \ -STATIC_INLINE int ## S my_atomic_load ## S(int ## S volatile *a) \ -{ \ - int ## S ret; \ - make_atomic_load_body(S); \ - return ret; \ +#define make_atomic_load(S) \ +STATIC_INLINE int ## S my_atomic_load ## S(Uv_ ## S U_a) \ +{ \ + int ## S ret; \ + make_atomic_load_body(S); \ + return ret; \ } -#define make_atomic_store(S) \ -STATIC_INLINE void my_atomic_store ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_store_body(S); \ +#define make_atomic_store(S) \ +STATIC_INLINE void my_atomic_store ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_store_body(S); \ } #else /* no inline functions */ -#define make_atomic_add(S) \ -extern int ## S my_atomic_add ## S(int ## S volatile *a, int ## S v); +#define make_atomic_add(S) \ +extern int ## S my_atomic_add ## S(Uv_ ## S, U_ ## S); -#define make_atomic_fas(S) \ -extern int ## S my_atomic_fas ## S(int ## S volatile *a, int ## S v); +#define make_atomic_fas(S) \ +extern int ## S my_atomic_fas ## S(Uv_ ## S, U_ ## S); -#define make_atomic_cas(S) \ -extern int my_atomic_cas ## S(int ## S volatile *a, int ## S *cmp, int ## S set); +#define make_atomic_cas(S) \ +extern int my_atomic_cas ## S(Uv_ ## S, Uv_ ## S, U_ ## S); -#define make_atomic_load(S) \ -extern int ## S my_atomic_load ## S(int ## S volatile *a); +#define make_atomic_load(S) \ +extern int ## S my_atomic_load ## S(Uv_ ## S); -#define make_atomic_store(S) \ -extern void my_atomic_store ## S(int ## S volatile *a, int ## S v); +#define make_atomic_store(S) \ +extern void my_atomic_store ## S(Uv_ ## S, U_ ## S); #endif @@ -157,6 +202,18 @@ make_atomic_fas(ptr) #undef _atomic_h_cleanup_ #endif +#undef U_8 +#undef U_16 +#undef U_32 +#undef U_ptr +#undef a +#undef cmp +#undef v +#undef set +#undef U_a +#undef U_cmp +#undef U_v +#undef U_set #undef make_atomic_add #undef make_atomic_cas #undef make_atomic_load diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index a33e233c29a..314335944d3 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -21,7 +21,8 @@ #include #include -volatile uint32 a32,b32,c32, N; +volatile uint32 a32,b32; +volatile int32 c32, N; my_atomic_rwlock_t rwl; LF_ALLOCATOR lf_allocator; LF_HASH lf_hash; @@ -98,7 +99,7 @@ pthread_handler_t test_atomic_cas_handler(void *arg) x= (x*m+0x87654321) & INT_MAX32; do { my_atomic_rwlock_wrlock(&rwl); - ok= my_atomic_cas32(&a32, &y, y+x); + ok= my_atomic_cas32(&a32, &y, (uint32)y+x); my_atomic_rwlock_wrunlock(&rwl); } while (!ok) ; do { From adfba203ffd1bd89d74a63ff09de9b9a40fb64d7 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Nov 2006 23:38:10 +0100 Subject: [PATCH 66/95] Maria - post-review fixes about my_sync_dir(): make it return an error (except if certain errno), test this error in callers. Do a single my_sync_dir() in my_rename() if possible. include/my_global.h: better have a symbol name talking about the feature, use it in the code of the feature, and define the symbol once depending on the platform, rather than have the platform "tested" in the code of the feature several times. include/my_sys.h: my_sync_dir() now can return error mysys/my_create.c: my_sync_dir() can now return an error mysys/my_delete.c: my_sync_dir() can now return an error mysys/my_rename.c: my_sync_dir() can now return an error. Do a single sync if "from" and "to" are the same directory. #ifdef here to not even compile dirname_part() if useless. mysys/my_sync.c: more comments. A compilation error if no way to make my_sync() work (I guess we don't want to ship a binary which cannot do any sync at all; users of strange OSes compile from source and can remove the #error). my_sync_dir() now returns an error (except for certain errno values considered ok; EIO "input/output error" is not ok). sql/unireg.cc: my_sync_dir() now returns an error which must be tested --- include/my_global.h | 8 ++++++ include/my_sys.h | 4 +-- mysys/my_create.c | 8 ++++-- mysys/my_delete.c | 5 ++-- mysys/my_rename.c | 12 ++++++-- mysys/my_sync.c | 67 +++++++++++++++++++++++---------------------- sql/unireg.cc | 9 +++--- 7 files changed, 67 insertions(+), 46 deletions(-) diff --git a/include/my_global.h b/include/my_global.h index 6831a76c945..f337c079c82 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -1468,4 +1468,12 @@ do { doubleget_union _tmp; \ #define dlerror() "" #endif +/* + Only Linux is known to need an explicit sync of the directory to make sure a + file creation/deletion/renaming in(from,to) this directory durable. +*/ +#ifdef TARGET_OS_LINUX +#define NEED_EXPLICIT_SYNC_DIR 1 +#endif + #endif /* my_global_h */ diff --git a/include/my_sys.h b/include/my_sys.h index 3c5b5e6cbf4..f235c4d3fed 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -623,8 +623,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern int my_fclose(FILE *fd,myf MyFlags); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_sync(File fd, myf my_flags); -extern void my_sync_dir(const char *dir_name, myf my_flags); -extern void my_sync_dir_by_file(const char *file_name, myf my_flags); +extern int my_sync_dir(const char *dir_name, myf my_flags); +extern int my_sync_dir_by_file(const char *file_name, myf my_flags); extern int my_error _VARARGS((int nr,myf MyFlags, ...)); extern int my_printf_error _VARARGS((uint my_err, const char *format, myf MyFlags, ...)) diff --git a/mysys/my_create.c b/mysys/my_create.c index bb3801691a5..0b1bfa12c18 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -53,8 +53,12 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif - if ((MyFlags & MY_SYNC_DIR) && (fd >=0)) - my_sync_dir_by_file(FileName, MyFlags); + if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && + my_sync_dir_by_file(FileName, MyFlags)) + { + my_close(fd, MyFlags); + fd= -1; + } DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); diff --git a/mysys/my_delete.c b/mysys/my_delete.c index 6d90caa48ed..d56507c36c0 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -30,8 +30,9 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } - else if (MyFlags & MY_SYNC_DIR) - my_sync_dir_by_file(name, MyFlags); + else if ((MyFlags & MY_SYNC_DIR) && + my_sync_dir_by_file(name, MyFlags)) + err= -1; DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 2c9ace6223a..8c2a354324b 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -63,8 +63,16 @@ int my_rename(const char *from, const char *to, myf MyFlags) } else if (MyFlags & MY_SYNC_DIR) { - my_sync_dir_by_file(from, MyFlags); - my_sync_dir_by_file(to, MyFlags); +#ifdef NEED_EXPLICIT_SYNC_DIR + /* do only the needed amount of syncs: */ + char dir_from[FN_REFLEN], dir_to[FN_REFLEN]; + dirname_part(dir_from, from); + dirname_part(dir_to, to); + if (my_sync_dir(dir_from, MyFlags) || + (strcmp(dir_from, dir_to) && + my_sync_dir(dir_to, MyFlags))) + error= -1; +#endif } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index eaa26ef07a7..ada2ea84414 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -50,10 +50,14 @@ int my_sync(File fd, myf my_flags) do { #if defined(F_FULLFSYNC) - /* Recent Mac OS X versions insist this call is safer than fsync() */ + /* + In Mac OS X >= 10.3 this call is safer than fsync() (it forces the + disk's cache). + */ if (!(res= fcntl(fd, F_FULLFSYNC, 0))) break; /* ok */ - /* Some fs don't support F_FULLFSYNC and fail above, fallback: */ + /* Some file systems don't support F_FULLFSYNC and fail above: */ + DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); #endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); @@ -62,7 +66,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else -#warning Cannot find a way to sync a file, durability in danger +#error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -74,7 +78,10 @@ int my_sync(File fd, myf my_flags) my_errno= -1; /* Unknown error */ if ((my_flags & MY_IGNORE_BADFD) && (er == EBADF || er == EINVAL || er == EROFS)) + { + DBUG_PRINT("info", ("ignoring errno %d", er)); res= 0; + } else if (my_flags & MY_WME) my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno); } @@ -83,68 +90,62 @@ int my_sync(File fd, myf my_flags) /* - Force directory information to disk. Only Linux is known to need this to - make sure a file creation/deletion/renaming in(from,to) this directory - durable. + Force directory information to disk. SYNOPSIS my_sync_dir() dir_name the name of the directory - my_flags unused + my_flags flags (MY_WME etc) RETURN - nothing (the sync may fail sometimes). + 0 if ok, !=0 if error */ -void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused))) +int my_sync_dir(const char *dir_name, myf my_flags) { -#ifdef TARGET_OS_LINUX +#ifdef NEED_EXPLICIT_SYNC_DIR DBUG_ENTER("my_sync_dir"); DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); File dir_fd; - int error= 0; + int res= 0; /* - Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) : - ignore errors. But print them to the debug log. + Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. + EIO on the other hand is very important. Hence MY_IGNORE_BADFD. */ - if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0)) + if ((dir_fd= my_open(dir_name, O_RDONLY, MYF(my_flags))) >= 0) { - if (my_sync(dir_fd, MYF(0))) - { - error= errno; - DBUG_PRINT("info",("my_sync failed errno: %d", error)); - } - my_close(dir_fd, MYF(0)); + if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) + res= 2; + if (my_close(dir_fd, MYF(my_flags))) + res= 3; } else - { - error= errno; - DBUG_PRINT("info",("my_open failed errno: %d", error)); - } - DBUG_VOID_RETURN; + res= 1; + DBUG_RETURN(res); +#else + return 0; #endif } /* - Force directory information to disk. Only Linux is known to need this to - make sure a file creation/deletion/renaming in(from,to) this directory - durable. + Force directory information to disk. SYNOPSIS my_sync_dir_by_file() file_name the name of a file in the directory - my_flags unused + my_flags flags (MY_WME etc) RETURN - nothing (the sync may fail sometimes). + 0 if ok, !=0 if error */ -void my_sync_dir_by_file(const char *file_name, - myf my_flags __attribute__((unused))) +int my_sync_dir_by_file(const char *file_name, myf my_flags) { -#ifdef TARGET_OS_LINUX +#ifdef NEED_EXPLICIT_SYNC_DIR char dir_name[FN_REFLEN]; dirname_part(dir_name, file_name); return my_sync_dir(dir_name, my_flags); +#else + return 0; #endif } diff --git a/sql/unireg.cc b/sql/unireg.cc index 5c2997c1483..b1d0e75723e 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -285,12 +285,11 @@ bool mysql_create_frm(THD *thd, const char *file_name, my_free((gptr) screen_buff,MYF(0)); my_free((gptr) keybuff, MYF(0)); - if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) - { - if (my_sync(file, MYF(MY_WME))) + if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) && + (my_sync(file, MYF(MY_WME)) || + my_sync_dir_by_file(file_name, MYF(MY_WME)))) goto err2; - my_sync_dir_by_file(file_name, MYF(0)); - } + if (my_close(file,MYF(MY_WME))) goto err3; From de6f550ec7015fccd044a54c7628cdf8cdc2ed8c Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 27 Nov 2006 22:01:29 +0100 Subject: [PATCH 67/95] WL#3072 Maria Recovery. Making DDLs durable in Maria: Sync table files after CREATE (of non-temp table), DROP, RENAME, TRUNCATE, sync directories and symlinks (for the 3 first commands). Comments for future log records. In ma_rename(), if rename of index works and then rename of data fails, try to undo the rename of the index to leave a consistent state. mysys/my_symlink.c: sync directory after creation of a symbolic link in it, if asked mysys/my_sync.c: comment. Fix for when the file's name has no directory in it. storage/maria/ma_create.c: sync files and links and dirs when creating a non-temporary table. Optimizations of the above to reduce syncs in the common cases: * if index file and data file have the exact same paths (regular and link), sync the directories (of regular and link) only once after creating the last file (the data file). * don't sync the data file if we didn't write to it (always true in our builds). storage/maria/ma_delete_all.c: sync files after truncating a table storage/maria/ma_delete_table.c: sync files and symbolic links and dirs after dropping a table storage/maria/ma_extra.c: a function which wraps the sync of the index file and the sync of the data file. storage/maria/ma_locking.c: using a wrapper function storage/maria/ma_rename.c: sync files and symbolic links and dirs after renaming a table. If rename of index works and then rename of data fails, try to undo the rename of the index to leave a consistent state. That is just a try, it may fail... storage/maria/ma_test3.c: warning to not pay attention to this test. storage/maria/maria_def.h: declaration for the function added to ma_extra.c --- mysys/my_symlink.c | 2 ++ mysys/my_sync.c | 8 ++++-- storage/maria/ma_create.c | 49 +++++++++++++++++++++++++++------ storage/maria/ma_delete_all.c | 26 +++++++++++++++++ storage/maria/ma_delete_table.c | 17 ++++++++++-- storage/maria/ma_extra.c | 11 ++++++-- storage/maria/ma_locking.c | 4 +-- storage/maria/ma_rename.c | 36 +++++++++++++++++++++--- storage/maria/ma_test3.c | 4 +++ storage/maria/maria_def.h | 1 + 10 files changed, 134 insertions(+), 24 deletions(-) diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c index 7be3fcd36f0..b3d68992578 100644 --- a/mysys/my_symlink.c +++ b/mysys/my_symlink.c @@ -85,6 +85,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags) if (MyFlags & MY_WME) my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno); } + else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags)) + result= -1; DBUG_RETURN(result); #endif /* HAVE_READLINK */ } diff --git a/mysys/my_sync.c b/mysys/my_sync.c index ada2ea84414..26bee5a293f 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -52,7 +52,7 @@ int my_sync(File fd, myf my_flags) #if defined(F_FULLFSYNC) /* In Mac OS X >= 10.3 this call is safer than fsync() (it forces the - disk's cache). + disk's cache and guarantees ordered writes). */ if (!(res= fcntl(fd, F_FULLFSYNC, 0))) break; /* ok */ @@ -89,6 +89,7 @@ int my_sync(File fd, myf my_flags) } /* my_sync */ +static const char cur_dir_name[]= {FN_CURLIB, 0}; /* Force directory information to disk. @@ -107,11 +108,14 @@ int my_sync_dir(const char *dir_name, myf my_flags) DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); File dir_fd; int res= 0; + const char *correct_dir_name; + /* Sometimes the path does not contain an explicit directory */ + correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name; /* Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. EIO on the other hand is very important. Hence MY_IGNORE_BADFD. */ - if ((dir_fd= my_open(dir_name, O_RDONLY, MYF(my_flags))) >= 0) + if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0) { if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) res= 2; diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 5926bba9406..76942e3d5e8 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -60,6 +60,8 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, ulong *rec_per_key_part; my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MARIA_MAX_KEY_BLOCK_SIZE]; MARIA_CREATE_INFO tmp_create_info; + my_bool tmp_table= FALSE; /* cache for presence of HA_OPTION_TMP_TABLE */ + myf sync_dir= MY_SYNC_DIR; DBUG_ENTER("maria_create"); DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", keys, columns, uniques, flags)); @@ -560,7 +562,11 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, /* max_data_file_length and max_key_file_length are recalculated on open */ if (options & HA_OPTION_TMP_TABLE) + { + tmp_table= TRUE; + sync_dir= 0; share.base.max_data_file_length=(my_off_t) ci->data_file_length; + } share.base.min_block_length= (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH && @@ -576,7 +582,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, { char *iext= strrchr(ci->index_file_name, '.'); int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT); - if (options & HA_OPTION_TMP_TABLE) + if (tmp_table) { char *path; /* chop off the table name, tempory tables use generated name */ @@ -597,8 +603,11 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, /* Don't create the table if the link or file exists to ensure that one doesn't accidently destroy another table. + Don't sync dir now if the data file has the same path. */ - create_flag=0; + create_flag= + (ci->data_file_name && + !strcmp(ci->index_file_name, ci->data_file_name)) ? 0 : sync_dir; } else { @@ -607,8 +616,11 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | MY_APPEND_EXT); linkname_ptr=0; - /* Replace the current file */ - create_flag=MY_DELETE_OLD; + /* + Replace the current file. + Don't sync dir now if the data file has the same path. + */ + create_flag= MY_DELETE_OLD | (!ci->data_file_name ? 0 : sync_dir); } /* @@ -627,7 +639,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, - MYF(MY_WME | create_flag))) < 0) + MYF(MY_WME|create_flag))) < 0) goto err; errpos=1; @@ -653,7 +665,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, char *dext= strrchr(ci->data_file_name, '.'); int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); - if (options & HA_OPTION_TMP_TABLE) + if (tmp_table) { char *path; /* chop off the table name, tempory tables use generated name */ @@ -682,7 +694,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } if ((dfile= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, - MYF(MY_WME | create_flag))) < 0) + MYF(MY_WME | create_flag | sync_dir))) < 0) goto err; } errpos=3; @@ -802,12 +814,18 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0))) goto err; + if (!tmp_table && my_sync(file, MYF(0))) + goto err; + if (! (flags & HA_DONT_TOUCH_DATA)) { #ifdef USE_RELOC if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0))) goto err; + if (!tmp_table && my_sync(dfile, MYF(0))) + goto err; #endif + /* if !USE_RELOC, there was no write to the file, no need to sync it */ errpos=2; if (my_close(dfile,MYF(0))) goto err; @@ -816,6 +834,19 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, pthread_mutex_unlock(&THR_LOCK_maria); if (my_close(file,MYF(0))) goto err; + /* + RECOVERYTODO + Write a log record describing the CREATE operation (just the file + names, link names, and the full header's content). + For this record to be of any use for Recovery, we need the upper + MySQL layer to be crash-safe, which it is not now (that would require work + using the ddl_log of sql/sql_table.cc); when is is, we should reconsider + the moment of writing this log record (before or after op, under + THR_LOCK_maria or not...), how to use it in Recovery, and force the log. + For now this record is just informative. + If operation failed earlier, we clean up in "err:" and the MySQL layer + will clean up the frm, so we needn't write anything to the log. + */ my_free((char*) rec_per_key_part,MYF(0)); DBUG_RETURN(0); @@ -831,14 +862,14 @@ err: if (! (flags & HA_DONT_TOUCH_DATA)) my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT), - MYF(0)); + MYF(sync_dir)); /* fall through */ case 1: VOID(my_close(file,MYF(0))); if (! (flags & HA_DONT_TOUCH_DATA)) my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT), - MYF(0)); + MYF(sync_dir)); } my_free((char*) rec_per_key_part, MYF(0)); DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */ diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index b16d82ed9f7..fccd29b15f1 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -30,6 +30,7 @@ int maria_delete_all_rows(MARIA_HA *info) { DBUG_RETURN(my_errno=EACCES); } + /* LOCKTODO take X-lock on table here */ if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); if (_ma_mark_file_changed(info)) @@ -53,9 +54,23 @@ int maria_delete_all_rows(MARIA_HA *info) since it was locked then there may be key blocks in the key cache */ flush_key_blocks(share->key_cache, share->kfile, FLUSH_IGNORE_CHANGED); + /* + RECOVERYTODO Log the two chsize and header modifications and force the + log. So that if crash between the two chsize, we finish the work at + Recovery. For this scenario: + "TRUNCATE TABLE t1; DROP TABLE t1; RENAME TABLE t2 to t1; crash;" + Recovery mustn't truncate the new t1, so the log records of TRUNCATE + should be applied only if t1 exists and its ZeroDirtyPagesLSN is smaller + than the records'. See more comments below. + */ if (my_chsize(info->dfile, 0, 0, MYF(MY_WME)) || my_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)) ) goto err; + /* + RECOVERYTODO Consider updating ZeroDirtyPagesLSN here. It is + not a necessity (it is one only in RENAME commands) but an optional + optimization which will allow some REDO skipping at Recovery. + */ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); #ifdef HAVE_MMAP /* Resize mmaped area */ @@ -63,14 +78,25 @@ int maria_delete_all_rows(MARIA_HA *info) _ma_remap_file(info, (my_off_t)0); rw_unlock(&info->s->mmap_lock); #endif + /* + RECOVERYTODO Until we have the TRUNCATE log record and take it into + account for log-low-water-mark calculation and use it in Recovery, we need + to sync. + */ + if (_ma_sync_table_files(info)) + goto err; allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(0); err: { int save_errno=my_errno; + /* RECOVERYTODO log the header modifications */ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + /* RECOVERYTODO until we log above we have to sync */ + if (_ma_sync_table_files(info) && !save_errno) + save_errno= my_errno; allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); } diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index dd781a93fc4..5c7b4337b20 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -31,6 +31,7 @@ int maria_delete_table(const char *name) #ifdef EXTRA_DEBUG _ma_check_table_is_closed(name,"delete"); #endif + /* LOCKTODO take X-lock on table here */ #ifdef USE_RAID { MARIA_HA *info; @@ -59,12 +60,22 @@ int maria_delete_table(const char *name) #endif /* USE_RAID */ fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); - if (my_delete_with_symlink(from, MYF(MY_WME))) + /* + RECOVERYTODO log the two deletes below. + Then do the file deletions. + For this log record to be of any use for Recovery, we need the upper MySQL + layer to be crash-safe in DDLs; when it is we should reconsider the moment + of writing this log record, how to use it in Recovery, and force the log. + For now this record is only informative. + */ + if (my_delete_with_symlink(from, MYF(MY_WME | MY_SYNC_DIR))) DBUG_RETURN(my_errno); fn_format(from,name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); #ifdef USE_RAID if (raid_type) - DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME)) ? my_errno : 0); + DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME | MY_SYNC_DIR)) ? + my_errno : 0); #endif - DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME)) ? my_errno : 0); + DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME | MY_SYNC_DIR)) ? + my_errno : 0); } diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index 57e540242b9..1f649a00753 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -316,9 +316,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg if (share->not_flushed) { share->not_flushed=0; - if (my_sync(share->kfile, MYF(0))) - error= my_errno; - if (my_sync(info->dfile, MYF(0))) + if (_ma_sync_table_files(info)) error= my_errno; if (error) { @@ -439,3 +437,10 @@ int maria_reset(MARIA_HA *info) HA_STATE_PREV_FOUND); DBUG_RETURN(error); } + + +int _ma_sync_table_files(const MARIA_HA *info) +{ + return (my_sync(info->dfile, MYF(0)) || + my_sync(info->s->kfile, MYF(0))); +} diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 5689d57f2a5..848fb7e9682 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -103,9 +103,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) share->changed=0; if (maria_flush) { - if (my_sync(share->kfile, MYF(0))) - error= my_errno; - if (my_sync(info->dfile, MYF(0))) + if (_ma_sync_table_files(info)) error= my_errno; } else diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 5f65cd2b213..5d89cc063d7 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -23,6 +23,7 @@ int maria_rename(const char *old_name, const char *new_name) { char from[FN_REFLEN],to[FN_REFLEN]; + int data_file_rename_error; #ifdef USE_RAID uint raid_type=0,raid_chunks=0; #endif @@ -32,6 +33,7 @@ int maria_rename(const char *old_name, const char *new_name) _ma_check_table_is_closed(old_name,"rename old_table"); _ma_check_table_is_closed(new_name,"rename new table2"); #endif + /* LOCKTODO take X-lock on table here */ #ifdef USE_RAID { MARIA_HA *info; @@ -48,14 +50,40 @@ int maria_rename(const char *old_name, const char *new_name) fn_format(from,old_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); fn_format(to,new_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); - if (my_rename_with_symlink(from, to, MYF(MY_WME))) + /* + RECOVERYTODO log the two renames below. Update + ZeroDirtyPagesLSN of the table on disk (=> sync the files), this is + needed so that Recovery does not pick a wrong table. + Then do the file renames. + For this log record to be of any use for Recovery, we need the upper MySQL + layer to be crash-safe in DDLs; when it is we should reconsider the moment + of writing this log record, how to use it in Recovery, and force the log. + For now this record is only informative. But ZeroDirtyPagesLSN is + critically needed! + */ + if (my_rename_with_symlink(from, to, MYF(MY_WME | MY_SYNC_DIR))) DBUG_RETURN(my_errno); fn_format(from,old_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); fn_format(to,new_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); #ifdef USE_RAID if (raid_type) - DBUG_RETURN(my_raid_rename(from, to, raid_chunks, MYF(MY_WME)) ? my_errno : - 0); + data_file_rename_error= my_raid_rename(from, to, raid_chunks, + MYF(MY_WME | MY_SYNC_DIR)); + else #endif - DBUG_RETURN(my_rename_with_symlink(from, to,MYF(MY_WME)) ? my_errno : 0); + data_file_rename_error= + my_rename_with_symlink(from, to, MYF(MY_WME | MY_SYNC_DIR)); + if (data_file_rename_error) + { + /* + now we have a renamed index file and a non-renamed data file, try to + undo the rename of the index file. + */ + data_file_rename_error= my_errno; + fn_format(from, old_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT)); + fn_format(to, new_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT)); + my_rename_with_symlink(to, from, MYF(MY_WME | MY_SYNC_DIR)); + } + DBUG_RETURN(data_file_rename_error); + } diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c index 96b896b03c6..2f205c33b12 100644 --- a/storage/maria/ma_test3.c +++ b/storage/maria/ma_test3.c @@ -65,6 +65,10 @@ int main(int argc,char **argv) MY_INIT(argv[0]); get_options(argc,argv); + fprintf(stderr, "WARNING! this program is to test 'external locking'" + " (when several processes share a table through file locking)" + " which is not supported by Maria at all; expect errors." + " We may soon remove this program.\n"); maria_init(); bzero((char*) keyinfo,sizeof(keyinfo)); bzero((char*) recinfo,sizeof(recinfo)); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 506bdbc71ca..62c50187888 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -741,3 +741,4 @@ int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file); int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, ulong); +int _ma_sync_table_files(const MARIA_HA *info); From 7a5d0c2e6efbffd95998b723e52a22de51c21dae Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 27 Nov 2006 22:09:06 +0100 Subject: [PATCH 68/95] Maria - Fixes to help some pushbuild hosts compile. include/Makefile.am: put my_bit.h in the tarball (need it for mf_keycache.c) mysys/lf_alloc-pin.c: // in .c file mysys/lf_hash.c: // in .c file --- include/Makefile.am | 2 +- mysys/lf_alloc-pin.c | 2 +- mysys/lf_hash.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/Makefile.am b/include/Makefile.am index d5e75d31593..dd7c25e91f4 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -23,7 +23,7 @@ pkginclude_HEADERS = my_dbug.h m_string.h my_sys.h my_list.h my_xml.h \ my_getopt.h sslopt-longopts.h my_dir.h typelib.h \ sslopt-vars.h sslopt-case.h sql_common.h keycache.h \ mysql_time.h mysql/plugin.h $(BUILT_SOURCES) -noinst_HEADERS = config-win.h config-netware.h lf.h \ +noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \ heap.h maria.h myisamchk.h my_bitmap.h my_uctype.h \ myisam.h myisampack.h myisammrg.h ft_global.h\ mysys_err.h my_base.h help_start.h help_end.h \ diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 43055766c3e..d0fa29ddaaf 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,4 +1,4 @@ -// TODO multi-pinbox +#warning TODO multi-pinbox /* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index ff0eb8326d5..7e61ef690c6 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -85,7 +85,8 @@ retry: { if (!cursor->curr) return 0; - do { // XXX or goto retry ? + do { +#warning XXX or goto retry ? link= cursor->curr->link; cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); From 2ee0d52a323441e72d623ae0faee18661ea3dae1 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Nov 2006 11:00:55 +0100 Subject: [PATCH 69/95] Maria - fixes for gcc -ansi (// changed to #warning) storage/maria/lockman.c: fix for -ansi (// changed to #warning) storage/maria/trnman.c: fix for -ansi (// changed to #warning) From 3345ed80a569bbd96fb9e0f3d9a0d5e1c0e1b3b7 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Nov 2006 12:14:36 +0100 Subject: [PATCH 70/95] Maria - pagecache unit tests: add test_file.h to the tarball unittest/mysys/Makefile.am: add test_file.h in the tarball --- unittest/mysys/Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index 9b230272329..3ef1c3af587 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -17,8 +17,8 @@ noinst_PROGRAMS = bitmap-t base64-t my_atomic-t \ # tests for mysys/mf_pagecache.c -mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c -mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c test_file.h +mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c test_file.h mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) From 9fcc34b4ae1bdcf8aeb4dad0295b7d3534491a17 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Nov 2006 15:12:28 +0100 Subject: [PATCH 71/95] Maria - misc fixes: don't run tests depending on the generic lock manager which will be removed; don't run page cache unit tests by default (too intensive). storage/maria/unittest/Makefile.am: don't run tests depending on the generic lock manager (it will be removed in the end), one of them crashes. unittest/mysys/Makefile.am: page cache tests put a too high load, causes problems on shared machines; so we still build them but give them a suffix so that they are not run by the default "test-unit" Makefile target. --- storage/maria/unittest/Makefile.am | 4 +- unittest/mysys/Makefile.am | 60 ++++++++++++++++-------------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index d0b247d65e1..969529db9c9 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -25,5 +25,7 @@ LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t lockman1-t lockman2-t +noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t +# the generic lock manager may not be used in the end and lockman1-t crashes, +# so we don't build lockman-t and lockman1-t CLEANFILES = maria_control diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index 3ef1c3af587..8dd0416dbac 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -7,13 +7,17 @@ LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a +# tests with the "big" suffix are too big to run for every push, +# cause load problems on shared machines. So we build them, +# but don't run them by default (for this, a non "-t" suffix is used). +# You can run them by hand (how: see "test" target in upper dir). noinst_PROGRAMS = bitmap-t base64-t my_atomic-t \ - mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ - mf_pagecache_single_64k-t \ - mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \ - mf_pagecache_consist_1kHC-t mf_pagecache_consist_64kHC-t \ - mf_pagecache_consist_1kRD-t mf_pagecache_consist_64kRD-t \ - mf_pagecache_consist_1kWR-t mf_pagecache_consist_64kWR-t + mf_pagecache_single_1k-t-big mf_pagecache_single_8k-t-big \ + mf_pagecache_single_64k-t-big \ + mf_pagecache_consist_1k-t-big mf_pagecache_consist_64k-t-big \ + mf_pagecache_consist_1kHC-t-big mf_pagecache_consist_64kHC-t-big \ + mf_pagecache_consist_1kRD-t-big mf_pagecache_consist_64kRD-t-big \ + mf_pagecache_consist_1kWR-t-big mf_pagecache_consist_64kWR-t-big # tests for mysys/mf_pagecache.c @@ -21,31 +25,31 @@ mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c test_file.h mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN -mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 -mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 +mf_pagecache_single_1k_t_big_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_8k_t_big_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_64k_t_big_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_1k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_single_8k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 +mf_pagecache_single_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 +mf_pagecache_consist_1k_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_consist_64k_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY -mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_1kHC_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_64kHC_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY -mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS -mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS +mf_pagecache_consist_1kRD_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS +mf_pagecache_consist_64kRD_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS -mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS -mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS +mf_pagecache_consist_1kWR_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS +mf_pagecache_consist_64kWR_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS CLEANFILES = my_pagecache_debug.log page_cache_test_file_1 From c86be6303bbc9d85c564876c1f9507ab634b442b Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Nov 2006 15:43:15 +0100 Subject: [PATCH 72/95] Maria - fixes for gcc -ansi (no //) storage/maria/lockman.c: no // storage/maria/trnman.c: no // --- storage/maria/lockman.c | 11 ++++++----- storage/maria/trnman.c | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 7a6b97b3d51..54ea95c6b61 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -1,6 +1,6 @@ -// TODO - allocate everything from dynarrays !!! (benchmark) -// TODO instant duration locks -// automatically place S instead of LS if possible +#warning TODO - allocate everything from dynarrays !!! (benchmark) +#warning TODO instant duration locks +#warning automatically place S instead of LS if possible /* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify @@ -217,7 +217,8 @@ typedef struct lockman_lock { uint64 resource; struct lockman_lock *lonext; intptr volatile link; - uint32 hashnr; // TODO - remove hashnr from LOCK + uint32 hashnr; +#warning TODO - remove hashnr from LOCK uint16 loid; uchar lock; /* sizeof(uchar) <= sizeof(enum) */ uchar flags; @@ -429,7 +430,7 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, cursor.upgrade_from->flags|= IGNORE_ME; #warning is this OK ? if a reader has already read upgrade_from, \ it may find it conflicting with node :( -//#error another bug - see the last test from test_lockman_simple() +#warning another bug - see the last test from test_lockman_simple() } } while (res == REPEAT_ONCE_MORE); diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 1df4c67b4aa..4399f0e1208 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -338,7 +338,8 @@ void trnman_end_trn(TRN *trn, my_bool commit) those lists, and thus nobody may want to free them. Now we don't need a mutex to access free_me list */ - while (free_me) // XXX send them to the purge thread + while (free_me) +#warning XXX send them to the purge thread { TRN *t= free_me; free_me= free_me->next; From 8993dab52dbacff5cc220b80e1679cd99dc2235c Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Nov 2006 21:12:17 +0100 Subject: [PATCH 73/95] Maria - fixes for gcc -ansi storage/maria/unittest/lockman-t.c: no // storage/maria/unittest/lockman1-t.c: no // storage/maria/unittest/lockman2-t.c: no // --- storage/maria/unittest/lockman-t.c | 2 +- storage/maria/unittest/lockman1-t.c | 2 +- storage/maria/unittest/lockman2-t.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c index df8be054ba3..8c0f71175e7 100644 --- a/storage/maria/unittest/lockman-t.c +++ b/storage/maria/unittest/lockman-t.c @@ -18,7 +18,7 @@ lockman for row and table locks */ -//#define EXTRA_VERBOSE +/* #define EXTRA_VERBOSE */ #include diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c index cf4791067dc..41a1f0fd2f4 100644 --- a/storage/maria/unittest/lockman1-t.c +++ b/storage/maria/unittest/lockman1-t.c @@ -18,7 +18,7 @@ lockman for row locks, tablockman for table locks */ -//#define EXTRA_VERBOSE +/* #define EXTRA_VERBOSE */ #include diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c index 18c3072b241..2a8090ab9ac 100644 --- a/storage/maria/unittest/lockman2-t.c +++ b/storage/maria/unittest/lockman2-t.c @@ -18,7 +18,7 @@ tablockman for row and table locks */ -//#define EXTRA_VERBOSE +/* #define EXTRA_VERBOSE */ #include From b72903fe54cf83c3dcbec7e3532e23bf946249ba Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 3 Dec 2006 17:06:27 +0100 Subject: [PATCH 74/95] Maria: * merging changes done to the key cache since May 2006 into Maria * enabling two small enough page cache's unit tests by default * fix to have non-buffered output in unit tests (to not have a false timeout killing in pushbuild) (patch given by Serg) * removing some warnings of gcc -ansi include/lf.h: getting rid of "warning: ISO C does not allow extra `;' outside of a function" (gcc -ansi) mysys/lf_hash.c: getting rid of "warning: ISO C does not allow extra `;' outside of a function" (gcc -ansi) mysys/mf_pagecache.c: Cosmetic changes to minimize the diff with the key cache. #define PAGECACHE_DEBUG_LOG is not needed (just define PAGECACHE_DEBUG if you want) (this change removes "warning: 'pagecache_debug_print' declared `static' but never defined"). Importing changes made to mf_keycache.c since May 2006, into the page cache. Disabling online resizing in the page cache. Fix for "warning: ISO C90 forbids mixed declarations and code". unittest/mysys/Makefile.am: Of the page cache's unit tests, two are small enough to run on pushbuild, renaming them to a -t suffix. unittest/mytap/tap.c: pushbuild kills a test after seeing no output from it for 10 minutes; so we set the mytap framework to not buffer output (patch given by Serg) so that output is seen more frequently and not "all at the end of the test". --- include/lf.h | 14 ++++---- mysys/lf_hash.c | 2 +- mysys/mf_pagecache.c | 66 +++++++++++++++++++++----------------- unittest/mysys/Makefile.am | 15 +++++---- unittest/mytap/tap.c | 2 ++ 5 files changed, 55 insertions(+), 44 deletions(-) diff --git a/include/lf.h b/include/lf.h index c7198ab75b6..ba29b6e9ca5 100644 --- a/include/lf.h +++ b/include/lf.h @@ -80,14 +80,14 @@ void lf_dynarray_destroy(LF_DYNARRAY *array); nolock_wrap(lf_dynarray_value, void *, (LF_DYNARRAY *array, uint idx), - (array, idx)); + (array, idx)) lock_wrap(lf_dynarray_lvalue, void *, (LF_DYNARRAY *array, uint idx), (array, idx), - &array->lock); + &array->lock) nolock_wrap(lf_dynarray_iterate, int, (LF_DYNARRAY *array, lf_dynarray_func func, void *arg), - (array, func, arg)); + (array, func, arg)) /* pin manager for memory allocator, lf_alloc-pin.c @@ -165,15 +165,15 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox); lock_wrap(lf_pinbox_get_pins, LF_PINS *, (LF_PINBOX *pinbox), (pinbox), - &pinbox->pinstack.lock); + &pinbox->pinstack.lock) lock_wrap_void(lf_pinbox_put_pins, (LF_PINS *pins), (pins), - &pins->pinbox->pinstack.lock); + &pins->pinbox->pinstack.lock) lock_wrap_void(lf_pinbox_free, (LF_PINS *pins, void *addr), (pins, addr), - &pins->pinbox->pinstack.lock); + &pins->pinbox->pinstack.lock) /* memory allocator, lf_alloc-pin.c @@ -208,7 +208,7 @@ uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); lock_wrap(lf_alloc_new, void *, (LF_PINS *pins), (pins), - &pins->pinbox->pinstack.lock); + &pins->pinbox->pinstack.lock) /* extendible hash, lf_hash.c diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 7e61ef690c6..e13c5f64f54 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -28,7 +28,7 @@ #include #include -LF_REQUIRE_PINS(3); +LF_REQUIRE_PINS(3) /* An element of the list */ typedef struct { diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4b92f68d9bf..fe2d827ab50 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -43,9 +43,9 @@ #include #include "my_static.h" #include +#include #include #include -#include /* @@ -86,8 +86,6 @@ #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" */ -#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" - /* In key cache we have external raw locking here we use SERIALIZED_READ_FROM_CACHE to avoid problem of reading @@ -115,14 +113,6 @@ /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; -#if defined(MSDOS) && !defined(M_IC80386) -/* we nead much memory */ -#undef my_malloc_lock -#undef my_free_lock -#define my_malloc_lock(A,B) halloc((long) (A/IO_SIZE),IO_SIZE) -#define my_free_lock(A,B) hfree(A) -#endif /* defined(MSDOS) && !defined(M_IC80386) */ - #define STRUCT_PTR(TYPE, MEMBER, a) \ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) @@ -314,8 +304,8 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_pin mode) { struct st_my_thread_var *thread= my_thread_var; - DBUG_ENTER("info_check_pin"); PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); + DBUG_ENTER("info_check_pin"); if (info) { if (mode == PAGECACHE_PIN_LEFT_UNPINNED) @@ -372,10 +362,10 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_pin pin) { struct st_my_thread_var *thread= my_thread_var; - DBUG_ENTER("info_check_lock"); PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, thread); + DBUG_ENTER("info_check_lock"); switch(lock) { case PAGECACHE_LOCK_LEFT_UNLOCKED: @@ -605,15 +595,9 @@ uint pagecache_fwrite(PAGECACHE *pagecache, next_power(value) is 2 at the power of (1+floor(log2(value))); e.g. next_power(2)=4, next_power(3)=4. */ -static uint next_power(uint value) +static inline uint next_power(uint value) { - uint old_value= 1; - while (value) - { - old_value= value; - value&= value-1; - } - return (old_value << 1); + return (uint) my_round_up_to_next_power((uint32) value) << 1; } @@ -834,15 +818,24 @@ err: The function starts the operation only when all other threads performing operations with the key cache let her to proceed (when cnt_for_resize=0). -*/ + Before being usable, this function needs: + - to receive fixes for BUG#17332 "changing key_buffer_size on a running + server can crash under load" similar to those done to the key cache + - to have us (Sanja) look at the additional constraints placed on + resizing, due to the page locking specific to this page cache. + So we disable it for now. +*/ +#if 0 /* keep disabled until code is fixed see above !! */ int resize_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold) { int blocks; +#ifdef THREAD struct st_my_thread_var *thread; PAGECACHE_WQUEUE *wqueue; +#endif DBUG_ENTER("resize_pagecache"); if (!pagecache->inited) @@ -909,6 +902,7 @@ finish: pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(blocks); } +#endif /* 0 */ /* @@ -1504,8 +1498,12 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) { DBUG_ENTER("remove_reader"); BLOCK_INFO(block); +#ifdef THREAD if (! --block->hash_link->requests && block->condvar) pagecache_pthread_cond_signal(block->condvar); +#else + --block->hash_link->requests; +#endif DBUG_VOID_RETURN; } @@ -1515,7 +1513,8 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) signals on its termination */ -static inline void wait_for_readers(PAGECACHE *pagecache, +static inline void wait_for_readers(PAGECACHE *pagecache + __attribute__((unused)), PAGECACHE_BLOCK_LINK *block) { #ifdef THREAD @@ -1684,7 +1683,6 @@ static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, { reg1 PAGECACHE_HASH_LINK *hash_link; PAGECACHE_HASH_LINK **start; - PAGECACHE_PAGE page; KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", (uint) file->file, (ulong) pageno)); @@ -1710,6 +1708,7 @@ restart: #ifdef THREAD /* Wait for a free hash link */ struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_PAGE page; KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); page.file= *file; page.pageno= pageno; @@ -2053,8 +2052,10 @@ restart: /* Remove the hash link for this page from the hash table */ unlink_hash(pagecache, block->hash_link); /* All pending requests for this page must be resubmitted */ +#ifdef THREAD if (block->wqueue[COND_FOR_SAVED].last_thread) release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); @@ -2209,10 +2210,10 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, BLOCK_INFO(block); while (block->status & BLOCK_WRLOCK) { - DBUG_PRINT("info", ("fail to lock, waiting...")); /* Lock failed we will wait */ #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; + DBUG_PRINT("info", ("fail to lock, waiting...")); add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); dec_counter_for_resize_op(pagecache); do @@ -2403,8 +2404,10 @@ static void read_block(PAGECACHE *pagecache, KEYCACHE_DBUG_PRINT("read_block", ("primary request: new page in cache")); /* Signal that all pending requests for this page now can be processed */ +#ifdef THREAD if (block->wqueue[COND_FOR_REQUESTED].last_thread) release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif } else { @@ -3210,9 +3213,11 @@ restart: block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); KEYCACHE_DBUG_PRINT("key_cache_insert", ("primary request: new page in cache")); +#ifdef THREAD /* Signal that all pending requests for this now can be processed. */ if (block->wqueue[COND_FOR_REQUESTED].last_thread) release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif } } else @@ -3223,11 +3228,9 @@ restart: if (block->status & BLOCK_CHANGED) link_to_file_list(pagecache, block, &block->hash_link->file, 1); } - else - { - if (! (block->status & BLOCK_CHANGED)) + else if (! (block->status & BLOCK_CHANGED)) link_to_changed_list(pagecache, block); - } + if (! (block->status & BLOCK_ERROR)) { bmove512(block->buffer, buff, pagecache->block_size); @@ -3342,9 +3345,11 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) /* Keep track of the number of currently unused blocks. */ pagecache->blocks_unused++; +#ifdef THREAD /* All pending requests for this page must be resubmitted. */ if (block->wqueue[COND_FOR_SAVED].last_thread) release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif } @@ -3438,12 +3443,14 @@ static int flush_cached_blocks(PAGECACHE *pagecache, if (!last_errno) last_errno= errno ? errno : -1; } +#ifdef THREAD /* Let to proceed for possible waiting requests to write to the block page. It might happen only during an operation to resize the key cache. */ if (block->wqueue[COND_FOR_SAVED].last_thread) release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif /* type will never be FLUSH_IGNORE_CHANGED here */ if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) { @@ -3970,7 +3977,6 @@ static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) { KEYCACHE_THREAD_TRACE_END(""); pthread_mutex_unlock(mutex); - return; } diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index 8dd0416dbac..f7b72255a72 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -12,7 +12,7 @@ LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ # but don't run them by default (for this, a non "-t" suffix is used). # You can run them by hand (how: see "test" target in upper dir). noinst_PROGRAMS = bitmap-t base64-t my_atomic-t \ - mf_pagecache_single_1k-t-big mf_pagecache_single_8k-t-big \ + mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ mf_pagecache_single_64k-t-big \ mf_pagecache_consist_1k-t-big mf_pagecache_consist_64k-t-big \ mf_pagecache_consist_1kHC-t-big mf_pagecache_consist_64kHC-t-big \ @@ -25,11 +25,14 @@ mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c test_file.h mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN -mf_pagecache_single_1k_t_big_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_8k_t_big_SOURCES = $(mf_pagecache_single_src) +# Those two take reasonable resources (<100 MB) for < 1 minute +mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 + +# All others below are big mf_pagecache_single_64k_t_big_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_1k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -mf_pagecache_single_8k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 mf_pagecache_single_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 mf_pagecache_consist_1k_t_big_SOURCES = $(mf_pagecache_consist_src) @@ -52,4 +55,4 @@ mf_pagecache_consist_1kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPA mf_pagecache_consist_64kWR_t_big_SOURCES = $(mf_pagecache_consist_src) mf_pagecache_consist_64kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS -CLEANFILES = my_pagecache_debug.log page_cache_test_file_1 +CLEANFILES = pagecache_debug.log page_cache_test_file_1 diff --git a/unittest/mytap/tap.c b/unittest/mytap/tap.c index 17ec51863d9..51330ad820b 100644 --- a/unittest/mytap/tap.c +++ b/unittest/mytap/tap.c @@ -150,6 +150,8 @@ static signal_entry install_signal[]= { void plan(int const count) { + + setvbuf(tapout, 0, _IONBF, 0); /* provide output at once */ /* Install signal handler */ From 5750daa4cf73eba0a6f1d9909844bf106228ae80 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 4 Dec 2006 15:31:04 +0100 Subject: [PATCH 75/95] Maria - post-review change of "fixes for gcc -ansi". And reducing the time taken by my_atomic-t. include/lf.h: fix to be able to add a ";" at the end of the macro's invokation (removing it here, removes a warning from "gcc -ansi" about a standalone ";"). mysys/lf_hash.c: ";" is ok now after LF_REQUIRE_PINS mysys/mf_pagecache.c: comment fix unittest/mysys/my_atomic-t.c: decreasing number of iterations to make test take less time. --- include/lf.h | 2 +- mysys/lf_hash.c | 2 +- mysys/mf_pagecache.c | 2 +- unittest/mysys/my_atomic-t.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/lf.h b/include/lf.h index ba29b6e9ca5..c161bd5044d 100644 --- a/include/lf.h +++ b/include/lf.h @@ -136,7 +136,7 @@ typedef struct { #if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) #define LF_REQUIRE_PINS(N) \ static const char require_pins[LF_PINBOX_PINS-N]; \ - static const int LF_NUM_PINS_IN_THIS_FILE= N; + static const int LF_NUM_PINS_IN_THIS_FILE= N #define _lf_pin(PINS, PIN, ADDR) \ ( \ assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \ diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index e13c5f64f54..7e61ef690c6 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -28,7 +28,7 @@ #include #include -LF_REQUIRE_PINS(3) +LF_REQUIRE_PINS(3); /* An element of the list */ typedef struct { diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index fe2d827ab50..3e3484d5efb 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -970,7 +970,7 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, /* - Flushes and removes page cache from memory + Removes page cache from memory. Does NOT flush pages to disk. SYNOPSIS end_pagecache() diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 314335944d3..02e07acd67b 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -275,9 +275,9 @@ int main() &my_charset_bin); #ifdef MY_ATOMIC_MODE_RWLOCKS -#define CYCLES 10000 +#define CYCLES 3000 #else -#define CYCLES 1000000 +#define CYCLES 300000 #endif #define THREADS 100 From ad29d5520b1ba379a75adc447f301851ff4588a4 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 7 Dec 2006 15:23:50 +0100 Subject: [PATCH 76/95] Maria - fix for "statement with no effect" warning mysys/lf_hash.c: fix for "statement with no effect" warning storage/maria/lockman.c: fix for "statement with no effect" warning --- mysys/lf_hash.c | 4 ++-- storage/maria/lockman.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 7e61ef690c6..b2ad7a93ace 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -96,7 +96,7 @@ retry: cur_keylen= cursor->curr->keylen; if (*cursor->prev != (intptr)cursor->curr) { - LF_BACKOFF; + (void)LF_BACKOFF; goto retry; } if (!DELETED(link)) @@ -118,7 +118,7 @@ retry: _lf_alloc_free(pins, cursor->curr); else { - LF_BACKOFF; + (void)LF_BACKOFF; goto retry; } } diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 54ea95c6b61..7672fadb068 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -287,7 +287,7 @@ retry: cur_flags= cursor->curr->flags; if (*cursor->prev != (intptr)cursor->curr) { - LF_BACKOFF; + (void)LF_BACKOFF; goto retry; } if (!DELETED(link)) @@ -364,7 +364,7 @@ retry: _lf_alloc_free(pins, cursor->curr); else { - LF_BACKOFF; + (void)LF_BACKOFF; goto retry; } } From fa05e9c9f426a19f016897ec57c047c277bf52c7 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 16 Dec 2006 18:10:47 +0100 Subject: [PATCH 77/95] WL#3071 - Maria checkpoint Adding rec_lsn to Maria's page cache. Misc fixes to Checkpoint. mysys/mf_pagecache.c: adding rec_lsn, the LSN when a page first became dirty. It is set when unlocking a page (TODO: should also be set when the unlocking is an implicit part of pagecache_write()). It is reset in link_to_file_list() and free_block() (one of which is used every time we flush a block). It is a ulonglong and not LSN, because its destination is comparisons for which ulonglong is better than a struct. storage/maria/ma_checkpoint.c: misc fixes to Checkpoint (updates now that the transaction manager and the page cache are more known) storage/maria/ma_close.c: an important note for the future. storage/maria/ma_least_recently_dirtied.c: comment --- mysys/mf_pagecache.c | 12 +- storage/maria/ma_checkpoint.c | 433 +++++++++++++--------- storage/maria/ma_close.c | 6 + storage/maria/ma_least_recently_dirtied.c | 5 +- 4 files changed, 277 insertions(+), 179 deletions(-) diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 3e3484d5efb..807a3ea520a 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -295,6 +295,7 @@ struct st_pagecache_block_link enum pagecache_page_type type; /* type of the block */ uint hits_left; /* number of hits left until promotion */ ulonglong last_hit_time; /* timestamp of the last hit */ + ulonglong rec_lsn; /* LSN when first became dirty */ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ }; @@ -1202,6 +1203,7 @@ static void link_to_file_list(PAGECACHE *pagecache, if (block->status & BLOCK_CHANGED) { block->status&= ~BLOCK_CHANGED; + block->rec_lsn= 0; pagecache->blocks_changed--; pagecache->global_blocks_changed--; } @@ -2509,6 +2511,8 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ + DBUG_ASSERT(first_REDO_LSN_for_page > 0); + set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } #ifndef DBUG_OFF @@ -2671,6 +2675,8 @@ void pagecache_unlock(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ + DBUG_ASSERT(first_REDO_LSN_for_page > 0); + set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } #ifndef DBUG_OFF @@ -3012,10 +3018,9 @@ restart: pagecache->blocks_changed--; pagecache->global_blocks_changed--; /* - free_block() will change the status of the block so no need to change - it here. + free_block() will change the status and rec_lsn of the block so no + need to change them here. */ - } /* Cache is locked, so we can relese page before freeing it */ pagecache_make_lock_and_pin(pagecache, block, @@ -3328,6 +3333,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; #endif + block->rec_lsn= 0; KEYCACHE_THREAD_TRACE("free block"); KEYCACHE_DBUG_PRINT("free_block", ("block is freed")); diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 83312ce37b8..717b6202559 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -40,8 +40,7 @@ #include "share.h" #include "log.h" -/* could also be called LSN_ERROR */ -#define LSN_IMPOSSIBLE ((LSN)0) +#define LSN_IMPOSSIBLE ((LSN)0) /* could also be called LSN_ERROR */ #define LSN_MAX ((LSN)ULONGLONG_MAX) /* @@ -57,9 +56,12 @@ st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,.. MEDIUM checkpoint. */ LSN max_rec_lsn_at_last_checkpoint= 0; +/* last submitted checkpoint request; cleared only when executed */ CHECKPOINT_LEVEL next_asynchronous_checkpoint_to_do= NONE; CHECKPOINT_LEVEL synchronous_checkpoint_in_progress= NONE; +static inline ulonglong read_non_atomic(ulonglong volatile *x); + /* Used by MySQL client threads requesting a checkpoint (like "ALTER MARIA ENGINE DO CHECKPOINT"), and probably by maria_panic(), and at the end of the @@ -67,6 +69,7 @@ CHECKPOINT_LEVEL synchronous_checkpoint_in_progress= NONE; */ my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) { + my_bool result; DBUG_ENTER("execute_synchronous_checkpoint"); DBUG_ASSERT(level > NONE); @@ -76,43 +79,52 @@ my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) wait_on_checkpoint_done_cond(); synchronous_checkpoint_in_progress= level; - execute_checkpoint(level); + result= execute_checkpoint(level); safemutex_assert_owner(log_mutex); synchronous_checkpoint_in_progress= NONE; unlock(log_mutex); broadcast(checkpoint_done_cond); + DBUG_RETURN(result); } -/* Picks a checkpoint request, if there is one, and executes it */ +/* + If no checkpoint is running, and there is a pending asynchronous checkpoint + request, executes it. + Is safe if multiple threads call it, though in first version only one will. + It's intended to be used by a thread which regularly calls this function; + this is why, if there is a request,it does not wait in a loop for + synchronous checkpoints to be finished, but just exits (because the thread + may want to do something useful meanwhile (flushing dirty pages for example) + instead of waiting). +*/ my_bool execute_asynchronous_checkpoint_if_any() { + my_bool result; CHECKPOINT_LEVEL level; DBUG_ENTER("execute_asynchronous_checkpoint"); lock(log_mutex); - if (likely(next_asynchronous_checkpoint_to_do == NONE)) + if (likely((next_asynchronous_checkpoint_to_do == NONE) || + (synchronous_checkpoint_in_progress != NONE))) { unlock(log_mutex); DBUG_RETURN(FALSE); } - while (synchronous_checkpoint_in_progress) - wait_on_checkpoint_done_cond(); - -do_checkpoint: level= next_asynchronous_checkpoint_to_do; DBUG_ASSERT(level > NONE); - execute_checkpoint(level); + result= execute_checkpoint(level); safemutex_assert_owner(log_mutex); - if (next_asynchronous_checkpoint_to_do > level) - goto do_checkpoint; /* one more request was posted */ - else + /* If only one thread calls this function, "<" can never happen below */ + if (next_asynchronous_checkpoint_to_do <= level) { - DBUG_ASSERT(next_asynchronous_checkpoint_to_do == level); - next_asynchronous_checkpoint_to_do= NONE; /* all work done */ + /* it's our request or weaker/equal ones, all work is done */ + next_asynchronous_checkpoint_to_do= NONE; } + /* otherwise if it is a stronger request, we'll deal with it at next call */ unlock(log_mutex); broadcast(checkpoint_done_cond); + DBUG_RETURN(result); } @@ -123,17 +135,14 @@ do_checkpoint: */ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) { - LSN candidate_max_rec_lsn_at_last_checkpoint; - /* to avoid { lock + no-op + unlock } in the common (==indirect) case */ - my_bool need_log_mutex; - DBUG_ENTER("execute_checkpoint"); safemutex_assert_owner(log_mutex); - copy_of_max_rec_lsn_at_last_checkpoint= max_rec_lsn_at_last_checkpoint; - if (unlikely(need_log_mutex= (level > INDIRECT))) + if (unlikely(level > INDIRECT)) { + LSN copy_of_max_rec_lsn_at_last_checkpoint= + max_rec_lsn_at_last_checkpoint; /* much I/O work to do, release log mutex */ unlock(log_mutex); @@ -149,51 +158,29 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) flush all pages which were already dirty at last checkpoint: ensures that recovery will never start from before the next-to-last checkpoint (two-checkpoint rule). - It is max, not min as the WL says (TODO update WL). */ flush_all_LRD_to_lsn(copy_of_max_rec_lsn_at_last_checkpoint); /* this will go full speed (normal scheduling, no sleep) */ break; } + lock(log_mutex); } - candidate_max_rec_lsn_at_last_checkpoint= checkpoint_indirect(need_log_mutex); - - lock(log_mutex); /* - this portion cannot be done as a hook in write_log_record() for the - LOGREC_CHECKPOINT type because: - - at that moment we still have not written to the control file so cannot - mark the request as done; this could be solved by writing to the control - file in the hook but that would be an I/O under the log's mutex, bad. - - it would not be nice organisation of code (I tried it :). + keep mutex locked upon exit because callers will want to clear + mutex-protected status variables */ - if (candidate_max_rec_lsn_at_last_checkpoint != LSN_IMPOSSIBLE) - { - /* checkpoint succeeded */ - maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; - written_since_last_checkpoint= (my_off_t)0; - DBUG_RETURN(FALSE); - } - /* - keep mutex locked because callers will want to clear mutex-protected - status variables - */ - DBUG_RETURN(TRUE); + DBUG_RETURN(execute_checkpoint_indirect()); } /* Does an indirect checpoint (collects data from data structures, writes into a checkpoint log record). - Returns the largest LSN of the LRD when the checkpoint happened (this is a - fuzzy definition), or LSN_IMPOSSIBLE on error. That LSN is used for the - "two-checkpoint rule" (MEDIUM checkpoints). + Starts and ends while having log's mutex (released in the middle). */ -LSN checkpoint_indirect(my_bool need_log_mutex) +my_bool execute_checkpoint_indirect() { - DBUG_ENTER("checkpoint_indirect"); - int error= 0; /* checkpoint record data: */ LSN checkpoint_start_lsn; @@ -202,163 +189,198 @@ LSN checkpoint_indirect(my_bool need_log_mutex) char *ptr; LSN checkpoint_lsn; LSN candidate_max_rec_lsn_at_last_checkpoint= 0; - list_element *el; /* to scan lists */ - ulong stored_LRD_size= 0; - + DBUG_ENTER("execute_checkpoint_indirect"); DBUG_ASSERT(sizeof(byte *) <= 8); DBUG_ASSERT(sizeof(LSN) <= 8); - if (need_log_mutex) - lock(log_mutex); /* maybe this will clash with log_read_end_lsn() */ + safemutex_assert_owner(log_mutex); checkpoint_start_lsn= log_read_end_lsn(); + if (LSN_IMPOSSIBLE == checkpoint_start_lsn) /* error */ + DBUG_RETURN(TRUE); unlock(log_mutex); DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); /* STEP 1: fetch information about dirty pages */ - - /* - We lock the entire cache but will be quick, just reading/writing a few MBs - of memory at most. - */ - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - - /* - This is an over-estimation, as in theory blocks_changed may contain - non-PAGECACHE_LSN_PAGE pages, which we don't want to store into the - checkpoint record; the true number of page-LRD-info we'll store into the - record is stored_LRD_size. - */ - string1.length= 8+8+(8+8)*pagecache->blocks_changed; - if (NULL == (string1.str= my_malloc(string1.length))) - goto err; - ptr= string1.str; - int8store(ptr, checkpoint_start_lsn); - ptr+= 8+8; /* don't store stored_LRD_size now, wait */ - if (pagecache->blocks_changed > 0) + /* note: this piece will move into mysys/mf_pagecache.c */ { + ulong stored_LRD_size= 0; /* - There are different ways to scan the dirty blocks; - flush_all_key_blocks() uses a loop over pagecache->used_last->next_used, - and for each element of the loop, loops into - pagecache->changed_blocks[FILE_HASH(file of the element)]. - This has the drawback that used_last includes non-dirty blocks, and it's - two loops over many elements. Here we try something simpler. - If there are no blocks in changed_blocks[file_hash], we should hit - zeroes and skip them. + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + When we enter here, we must be sure that no "first_in_switch" situation + is happening or will happen (either we have to get rid of + first_in_switch in the code or, first_in_switch has to increment a + "danger" counter for Checkpoint to know it has to wait. */ - uint file_hash; - for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* + This is an over-estimation, as in theory blocks_changed may contain + non-PAGECACHE_LSN_PAGE pages, which we don't want to store into the + checkpoint record; the true number of page-LRD-info we'll store into the + record is stored_LRD_size. + */ + /* + TODO: Ingo says blocks_changed is not a reliable number (see his + document); ask him. + */ + string1.length= 8+8+(8+8+8)*pagecache->blocks_changed; + if (NULL == (string1.str= my_malloc(string1.length))) + goto err; + ptr= string1.str; + int8store(ptr, checkpoint_start_lsn); + ptr+= 8+8; /* don't store stored_LRD_size now, wait */ + if (pagecache->blocks_changed > 0) { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->changed_blocks[file_hash] ; - block; - block= block->next_changed) + uint file_hash; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) { - DBUG_ASSERT(block->hash_link != NULL); - DBUG_ASSERT(block->status & BLOCK_CHANGED); - if (block->type != PAGECACHE_LSN_PAGE) + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) { - /* no need to store it in the checkpoint record */ - continue; + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & BLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + { + continue; /* no need to store it in the checkpoint record */ + } + /* + In the current pagecache, rec_lsn is not set correctly: + 1) it is set on pagecache_unlock(), too late (a page is dirty + (BLOCK_CHANGED) since the first pagecache_write()). It may however + be not too late, because until unlock(), the page's update is not + committed, so it's ok that REDOs for it be skipped at Recovery + (which is what happens with an unset rec_lsn). Note that this + relies on the assumption that a transaction never commits while + holding locks on pages. + 2) sometimes the unlocking can be an implicit action of + pagecache_write(), without any call to pagecache_unlock(), then + rec_lsn is not set. That one is a critical problem. + TODO: fix this when Monty has explained how he writes BLOB pages. + */ + if (0 == block->rec_lsn) + abort(); /* always fail in all builds, in case it's problem 2) */ + + int8store(ptr, block->hash_link->file.file); + ptr+= 8; + int8store(ptr, block->hash_link->pageno); + ptr+= 8; + int8store(ptr, block->rec_lsn); + ptr+= 8; + stored_LRD_size++; + DBUG_ASSERT(stored_LRD_size <= pagecache->blocks_changed); + set_if_bigger(candidate_max_rec_lsn_at_last_checkpoint, + block->rec_lsn); } - /* Q: two "block"s cannot have the same "hash_link", right? */ - int8store(ptr, block->hash_link->pageno); - ptr+= 8; - /* I assume rec_lsn will be member of "block", not of "hash_link" */ - int8store(ptr, block->rec_lsn); - ptr+= 8; - stored_LRD_size++; - set_if_bigger(candidate_max_rec_lsn_at_last_checkpoint, - block->rec_lsn); } - } - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_ASSERT(stored_LRD_size <= pagecache->blocks_changed); - int8store(string1.str+8, stored_LRD_size); - string1.length= 8+8+(8+8)*stored_LRD_size; + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + int8store(string1.str+8, stored_LRD_size); + string1.length= 8+8+(8+8+8)*stored_LRD_size; + } /* STEP 2: fetch information about transactions */ - + /* note: this piece will move into trnman.c */ /* - If trx are in more than one list (e.g. three: - running transactions, committed transactions, purge queue), we can either - take mutexes of all three together or do crabbing. - But if an element can move from list 1 to list 3 without passing through - list 2, crabbing is dangerous. - Hopefully it's ok to take 3 mutexes together... - Otherwise I'll have to make sure I miss no important trx and I handle dups. + Transactions are in the "active list" (protected by a mutex) and in a + lock-free hash of "committed" (insertion protected by the same mutex, + deletion lock-free). */ - lock(global_transactions_list_mutex); /* or 3 mutexes if there are 3 */ - string2.length= 8+(8+8)*trx_list->count; - if (NULL == (string2.str= my_malloc(string2.length))) - goto err; - ptr= string2.str; - int8store(ptr, trx_list->count); - ptr+= 8; - for (el= trx_list->first; el; el= el->next) { - /* possibly latch el.rwlock */ - *ptr= el->state; - ptr++; - int7store(ptr, el->long_trans_id); - ptr+= 7; - int2store(ptr, el->short_trans_id); - ptr+= 2; - int8store(ptr, el->undo_lsn); - ptr+= 8; - int8store(ptr, el->undo_purge_lsn); + TRN *trn; + ulong stored_trn_size= 0; + /* First, the active transactions */ + pthread_mutex_lock(LOCK_trn_list); + string2.length= 8+(7+2+8+8+8)*trnman_active_transactions; + if (NULL == (string2.str= my_malloc(string2.length))) + goto err; + ptr= string2.str; ptr+= 8; + for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) + { + /* we would latch trn.rwlock if it existed */ + if (0 == trn->short_trid) /* trn is not inited, skip */ + continue; + /* state is not needed for now (only when we have prepared trx) */ + /* int7store does not exist but mi_int7store does */ + int7store(ptr, trn->trid); + ptr+= 7; + int2store(ptr, trn->short_trid); + ptr+= 2; + int8store(ptr, trn->undo_lsn); /* is an LSN 7 or 8 bytes really? */ + ptr+= 8; + int8store(ptr, trn->undo_purge_lsn); + ptr+= 8; + int8store(ptr, read_non_atomic(&trn->first_undo_lsn)); + ptr+= 8; + /* possibly unlatch el.rwlock */ + stored_trn_size++; + } + pthread_mutex_unlock(LOCK_trn_list); /* - if no latch, use double variable of type ULONGLONG_CONSISTENT in - st_transaction, or even no need if Intel >=486 + Now the committed ones. + We need a function which scans the hash's list of elements in a + lock-free manner (a bit like lfind(), starting from bucket 0), and for + each node (committed transaction) stores the transaction's + information (trid, undo_purge_lsn, first_undo_lsn) into a buffer. + This big buffer is malloc'ed at the start, so the number of elements (or + an upper bound of it) found in the hash needs to be known in advance + (one solution is to keep LOCK_trn_list locked, ensuring that nodes are + only deleted). */ - int8store(ptr, el->first_undo_lsn); - ptr+= 8; - /* possibly unlatch el.rwlock */ + /* + TODO: if we see there exists no transaction (active and committed) we can + tell the lock-free structures to do some freeing (my_free()). + */ + int8store(string1.str, stored_trn_size); + string2.length= 8+(7+2+8+8+8)*stored_trn_size; } - unlock(global_transactions_list_mutex); /* STEP 3: fetch information about table files */ - /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ - lock(global_share_list_mutex); - string3.length= 8+(8+8)*share_list->count; - if (NULL == (string3.str= my_malloc(string3.length))) - goto err; - ptr= string3.str; - /* possibly latch each MARIA_SHARE, one by one, like this: */ - pthread_mutex_lock(&share->intern_lock); - /* - We'll copy the file id (a bit like share->kfile), the file name - (like share->unique_file_name[_length]). - */ - make_copy_of_global_share_list_to_array; - pthread_mutex_unlock(&share->intern_lock); - unlock(global_share_list_mutex); - - /* work on copy */ - int8store(ptr, elements_in_array); - ptr+= 8; - for (el in array) { - int8store(ptr, array[...].file_id); - ptr+= 8; - memcpy(ptr, array[...].file_name, ...); - ptr+= ...; + /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ + lock(global_share_list_mutex); + string3.length= 8+(8+8)*share_list->count; + if (NULL == (string3.str= my_malloc(string3.length))) + goto err; + ptr= string3.str; + /* possibly latch each MARIA_SHARE, one by one, like this: */ + pthread_mutex_lock(&share->intern_lock); /* - these two are long ops (involving disk I/O) that's why we copied the - list, to not keep the list locked for long: + We'll copy the file id (a bit like share->kfile), the file name + (like share->unique_file_name[_length]). */ - flush_bitmap_pages(el); - /* TODO: and also autoinc counter, logical file end, free page list */ + make_copy_of_global_share_list_to_array; + pthread_mutex_unlock(&share->intern_lock); + unlock(global_share_list_mutex); - /* - fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per - second, so if you have touched 1000 files it's 7 seconds). - */ - force_file(el); + /* work on copy */ + int8store(ptr, elements_in_array); + ptr+= 8; + for (el in array) + { + int8store(ptr, array[...].file_id); + ptr+= 8; + memcpy(ptr, array[...].file_name, ...); + ptr+= ...; + /* + these two are long ops (involving disk I/O) that's why we copied the + list, to not keep the list locked for long: + */ + /* TODO: what if the table pointer is gone/reused now? */ + flush_bitmap_pages(el); + /* TODO: and also autoinc counter, logical file end, free page list */ + + /* + fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per + second, so if you have touched 1000 files it's 7 seconds). + */ + force_file(el); + } } /* LAST STEP: now write the checkpoint log record */ @@ -389,11 +411,38 @@ err: candidate_max_rec_lsn_at_last_checkpoint= LSN_IMPOSSIBLE; end: + my_free(buffer1.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR)); my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(candidate_max_rec_lsn_at_last_checkpoint); + /* + this portion cannot be done as a hook in write_log_record() for the + LOGREC_CHECKPOINT type because: + - at that moment we still have not written to the control file so cannot + mark the request as done; this could be solved by writing to the control + file in the hook but that would be an I/O under the log's mutex, bad. + - it would not be nice organisation of code (I tried it :). + */ + if (candidate_max_rec_lsn_at_last_checkpoint != LSN_IMPOSSIBLE) + { + /* checkpoint succeeded */ + /* + TODO: compute log's low water mark (how to do that with our fuzzy + ARIES-like reads of data structures? TODO think about it :). + */ + lock(log_mutex); + /* That LSN is used for the "two-checkpoint rule" (MEDIUM checkpoints) */ + maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; + written_since_last_checkpoint= (my_off_t)0; + DBUG_RETURN(FALSE); + } + lock(log_mutex); + DBUG_RETURN(TRUE); + /* + keep mutex locked upon exit because callers will want to clear + mutex-protected status variables + */ } @@ -433,7 +482,7 @@ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); safemutex_assert_owner(log_mutex); DBUG_ASSERT(level > NONE); - if (checkpoint_request < level) + if (next_asynchronous_checkpoint_to_do < level) { /* no equal or stronger running or to run, we post request */ /* @@ -445,7 +494,7 @@ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); (see least_recently_dirtied.c) will notice our request in max a few seconds. */ - checkpoint_request= level; /* post request */ + next_asynchronous_checkpoint_to_do= level; /* post request */ } /* @@ -457,3 +506,37 @@ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); the end user. */ } + + +/* + If a 64-bit variable transitions from both halves being zero to both halves + being non-zero, and never changes after that (like the transaction's + first_undo_lsn), this function can be used to do a read of it (without + mutex, without atomic load) which always produces a correct (though maybe + slightly old) value (even on 32-bit CPUs). +*/ +static inline ulonglong read_non_atomic(ulonglong volatile *x) +{ +#if ( SIZEOF_CHARP >= 8 ) + /* 64-bit CPU (right?), 64-bit reads are atomic */ + return *x; +#else + /* + 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old + low bits and new high bits, or the contrary). + As the variable we read transitions from both halves being zero to both + halves being non-zero, and never changes then, we can detect atomicity + problems: + */ + ulonglong y; + for (;;) /* loop until no atomicity problems */ + { + y= *x; + if (likely(((0 == y) || + ((0 != (y >> 32)) && (0 != (y << 32))))) + return y; + /* Worth seeing it! */ + DBUG_PRINT("info",("atomicity problem")); + } +#endif +} diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 5b940eaf4c3..73764cf444a 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -57,6 +57,12 @@ int maria_close(register MARIA_HA *info) info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); } flag= !--share->reopen; + /* + RECOVERYTODO: + Below we are going to make the table unknown to future checkpoints, so it + needs to have fsync'ed itself entirely (bitmap, pages, etc) at this + point. + */ maria_open_list=list_delete(maria_open_list,&info->open_list); pthread_mutex_unlock(&share->intern_lock); diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c index b0b7fb1ef10..809442b4e97 100644 --- a/storage/maria/ma_least_recently_dirtied.c +++ b/storage/maria/ma_least_recently_dirtied.c @@ -94,7 +94,10 @@ pthread_handler_decl background_flush_and_checkpoint_thread() while (this_thread_not_killed) { if ((flush_calls++) & ((2< Date: Sat, 16 Dec 2006 18:10:48 +0100 Subject: [PATCH 78/95] WL#3071 - Maria checkpoint. Correcting comment about a bad problem. storage/maria/ma_checkpoint.c: I was too optimistic; problem 1) is really a bad problem. --- storage/maria/ma_checkpoint.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 717b6202559..608a6fb9fcd 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -212,7 +212,7 @@ my_bool execute_checkpoint_indirect() When we enter here, we must be sure that no "first_in_switch" situation is happening or will happen (either we have to get rid of first_in_switch in the code or, first_in_switch has to increment a - "danger" counter for Checkpoint to know it has to wait. + "danger" counter for Checkpoint to know it has to wait. TODO. */ pagecache_pthread_mutex_lock(&pagecache->cache_lock); @@ -251,19 +251,25 @@ my_bool execute_checkpoint_indirect() /* In the current pagecache, rec_lsn is not set correctly: 1) it is set on pagecache_unlock(), too late (a page is dirty - (BLOCK_CHANGED) since the first pagecache_write()). It may however - be not too late, because until unlock(), the page's update is not - committed, so it's ok that REDOs for it be skipped at Recovery - (which is what happens with an unset rec_lsn). Note that this - relies on the assumption that a transaction never commits while - holding locks on pages. + (BLOCK_CHANGED) since the first pagecache_write()). So in this + scenario: + thread1: thread2: + write_REDO + pagecache_write() + checkpoint : reclsn not known + pagecache_unlock(sets rec_lsn) + commit + crash, + at recovery we will wrongly skip the REDO. It also affects the + low-water mark's computation. 2) sometimes the unlocking can be an implicit action of pagecache_write(), without any call to pagecache_unlock(), then - rec_lsn is not set. That one is a critical problem. + rec_lsn is not set. + 1) and 2) are critical problems. TODO: fix this when Monty has explained how he writes BLOB pages. */ if (0 == block->rec_lsn) - abort(); /* always fail in all builds, in case it's problem 2) */ + abort(); /* always fail in all builds */ int8store(ptr, block->hash_link->file.file); ptr+= 8; From 7199c905590391f64802913369aab7d288eff4c8 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 18 Dec 2006 17:24:02 +0100 Subject: [PATCH 79/95] WL#3071 Maria checkpoint - cleanups, simplifications - moving the construction of the "dirty pages table" into the pagecache where it belongs (because it's the pagecache which knows dirty pages). TODO: do the same soon for the "transactions table". - fix for a small bug in the pagecache (decrementation of "changed_blocks") include/pagecache.h: prototype mysys/mf_pagecache.c: m_string.h moves up for LEX_STRING to be known for pagecache.h. In pagecache_delete_page(), we must decrement "blocks_changed" even if we just delete the page without flushing it. A new function pagecache_collect_changed_blocks_with_LSN() (used by the Checkpoint module), which stores information about the changed blocks (a.k.a. "the dirty pages table") into a LEX_STRING. This function is not tested now, it will be when there is a Checkpoint. storage/maria/ma_checkpoint.c: refining the checkpoint code: factoring functions, moving the construction of the "dirty pages table" into mf_pagecache.c (I'll do the same with the construction of the "transactions table" once Serg tells me what's the best way to do it). storage/maria/ma_least_recently_dirtied.c: Simplifying the thread which does background flushing of least-recently-dirtied pages: - in first version that thread will not flush, just do checkpoints - in 2nd version, flushing should re-use existing page cache functions like flush_pagecache_blocks(). unittest/mysys/test_file.h: m_string.h moves up for LEX_STRING to be known in pagecache.h --- include/pagecache.h | 3 + mysys/mf_pagecache.c | 180 ++++++++++++++++++--- storage/maria/ma_checkpoint.c | 179 +++++---------------- storage/maria/ma_least_recently_dirtied.c | 186 ++++------------------ unittest/mysys/test_file.h | 2 +- 5 files changed, 233 insertions(+), 317 deletions(-) diff --git a/include/pagecache.h b/include/pagecache.h index 4d64070ad62..9f215325ae5 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -221,6 +221,9 @@ extern my_bool pagecache_delete_page(PAGECACHE *pagecache, enum pagecache_page_lock lock, my_bool flush); extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); +extern my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *max_lsn); C_MODE_END #endif /* _keycache_h */ diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 807a3ea520a..96c855fda0a 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -40,9 +40,9 @@ */ #include "mysys_priv.h" +#include #include #include "my_static.h" -#include #include #include #include @@ -295,7 +295,7 @@ struct st_pagecache_block_link enum pagecache_page_type type; /* type of the block */ uint hits_left; /* number of hits left until promotion */ ulonglong last_hit_time; /* timestamp of the last hit */ - ulonglong rec_lsn; /* LSN when first became dirty */ + LSN rec_lsn; /* LSN when first became dirty */ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ }; @@ -2988,33 +2988,35 @@ restart: goto restart; } - if (block->status & BLOCK_CHANGED && flush) + if (block->status & BLOCK_CHANGED) { - /* The block contains a dirty page - push it out of the cache */ - - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - The call is thread safe because only the current - thread might change the block->hash_link value - */ - DBUG_ASSERT(block->pins == 1); - error= pagecache_fwrite(pagecache, - &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache->global_cache_write++; - - if (error) + if (flush) { - block->status|= BLOCK_ERROR; - goto err; + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + + if (error) + { + block->status|= BLOCK_ERROR; + goto err; + } } - pagecache->blocks_changed--; pagecache->global_blocks_changed--; /* @@ -3793,6 +3795,132 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) } +/* + Allocates a buffer and stores in it some information about all dirty pages + of type PAGECACHE_LSN_PAGE. + + SYNOPSIS + pagecache_collect_changed_blocks_with_LSN() + pagecache pointer to the page cache + str (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + max_lsn (OUT) pointer to a LSN where the maximum rec_lsn of all + relevant dirty pages will be put + + DESCRIPTION + Does the allocation because the caller cannot know the size itself. + Memory freeing is done by the caller. + Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they + are not interesting for a checkpoint record. + The caller has the intention of doing checkpoints. + + RETURN + 0 on success + 1 on error +*/ +my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *max_lsn) +{ + my_bool error; + ulong stored_LRD_size= 0; + uint file_hash; + char *ptr; + DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); + + *max_lsn= 0; + /* + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + When we enter here, we must be sure that no "first_in_switch" situation + is happening or will happen (either we have to get rid of + first_in_switch in the code or, first_in_switch has to increment a + "danger" counter for this function to know it has to wait). TODO. + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* Count how many dirty pages are interesting */ + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + /* + Q: is there somthing subtle with block->hash_link: can it be NULL? + does it have to be == hash_link->block... ? + */ + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & BLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it */ + /* + In the current pagecache, rec_lsn is not set correctly: + 1) it is set on pagecache_unlock(), too late (a page is dirty + (BLOCK_CHANGED) since the first pagecache_write()). So in this + scenario: + thread1: thread2: + write_REDO + pagecache_write() checkpoint : reclsn not known + pagecache_unlock(sets rec_lsn) + commit + crash, + at recovery we will wrongly skip the REDO. It also affects the + low-water mark's computation. + 2) sometimes the unlocking can be an implicit action of + pagecache_write(), without any call to pagecache_unlock(), then + rec_lsn is not set. + 1) and 2) are critical problems. + TODO: fix this when Monty has explained how he writes BLOB pages. + */ + if (0 == block->rec_lsn) + { + DBUG_ASSERT(0); + goto err; + } + stored_LRD_size++; + } + } + + str->length= 8+(4+4+8)*stored_LRD_size; + if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) + goto err; + ptr= str->str; + int8store(ptr, stored_LRD_size); + ptr+= 8; + if (0 == stored_LRD_size) + goto end; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it in the checkpoint record */ + DBUG_ASSERT((4 == sizeof(block->hash_link->file.file)) && + (4 == sizeof(block->hash_link->pageno))); + int4store(ptr, block->hash_link->file.file); + ptr+= 4; + int4store(ptr, block->hash_link->pageno); + ptr+= 4; + int8store(ptr, (ulonglong)block->rec_lsn); + ptr+= 8; + set_if_bigger(*max_lsn, block->rec_lsn); + } + } + error= 0; + goto end; +err: + error= 1; +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(error); +} + + #ifndef DBUG_OFF /* Test if disk-cache is ok diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 608a6fb9fcd..a1d094d7da1 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -56,9 +56,9 @@ st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,.. MEDIUM checkpoint. */ LSN max_rec_lsn_at_last_checkpoint= 0; -/* last submitted checkpoint request; cleared only when executed */ +/* last submitted checkpoint request; cleared when starts */ CHECKPOINT_LEVEL next_asynchronous_checkpoint_to_do= NONE; -CHECKPOINT_LEVEL synchronous_checkpoint_in_progress= NONE; +CHECKPOINT_LEVEL checkpoint_in_progress= NONE; static inline ulonglong read_non_atomic(ulonglong volatile *x); @@ -74,16 +74,10 @@ my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) DBUG_ASSERT(level > NONE); lock(log_mutex); - while ((synchronous_checkpoint_in_progress != NONE) || - (next_asynchronous_checkpoint_to_do != NONE)) + while (checkpoint_in_progress != NONE) wait_on_checkpoint_done_cond(); - synchronous_checkpoint_in_progress= level; result= execute_checkpoint(level); - safemutex_assert_owner(log_mutex); - synchronous_checkpoint_in_progress= NONE; - unlock(log_mutex); - broadcast(checkpoint_done_cond); DBUG_RETURN(result); } @@ -92,7 +86,7 @@ my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) request, executes it. Is safe if multiple threads call it, though in first version only one will. It's intended to be used by a thread which regularly calls this function; - this is why, if there is a request,it does not wait in a loop for + this is why, if there is a request, it does not wait in a loop for synchronous checkpoints to be finished, but just exits (because the thread may want to do something useful meanwhile (flushing dirty pages for example) instead of waiting). @@ -103,27 +97,20 @@ my_bool execute_asynchronous_checkpoint_if_any() CHECKPOINT_LEVEL level; DBUG_ENTER("execute_asynchronous_checkpoint"); + /* first check without mutex, ok to see old data */ + if (likely((next_asynchronous_checkpoint_to_do == NONE) || + (checkpoint_in_progress != NONE))) + DBUG_RETURN(FALSE); + lock(log_mutex); if (likely((next_asynchronous_checkpoint_to_do == NONE) || - (synchronous_checkpoint_in_progress != NONE))) + (checkpoint_in_progress != NONE))) { unlock(log_mutex); DBUG_RETURN(FALSE); } - level= next_asynchronous_checkpoint_to_do; - DBUG_ASSERT(level > NONE); - result= execute_checkpoint(level); - safemutex_assert_owner(log_mutex); - /* If only one thread calls this function, "<" can never happen below */ - if (next_asynchronous_checkpoint_to_do <= level) - { - /* it's our request or weaker/equal ones, all work is done */ - next_asynchronous_checkpoint_to_do= NONE; - } - /* otherwise if it is a stronger request, we'll deal with it at next call */ - unlock(log_mutex); - broadcast(checkpoint_done_cond); + result= execute_checkpoint(next_asynchronous_checkpoint_to_do); DBUG_RETURN(result); } @@ -135,9 +122,13 @@ my_bool execute_asynchronous_checkpoint_if_any() */ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) { + my_bool result; DBUG_ENTER("execute_checkpoint"); safemutex_assert_owner(log_mutex); + if (next_asynchronous_checkpoint_to_do <= level) + next_asynchronous_checkpoint_to_do= NONE; + checkpoint_in_progress= level; if (unlikely(level > INDIRECT)) { @@ -166,11 +157,11 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) lock(log_mutex); } - /* - keep mutex locked upon exit because callers will want to clear - mutex-protected status variables - */ - DBUG_RETURN(execute_checkpoint_indirect()); + result= execute_checkpoint_indirect(); + checkpoint_in_progress= NONE; + unlock(log_mutex); + broadcast(checkpoint_done_cond); + DBUG_RETURN(result); } @@ -181,114 +172,37 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) */ my_bool execute_checkpoint_indirect() { - int error= 0; + int error= 0, i; /* checkpoint record data: */ LSN checkpoint_start_lsn; - LEX_STRING string1={0,0}, string2={0,0}, string3={0,0}; - LEX_STRING *string_array[4]; + char checkpoint_start_lsn_char[8]; + LEX_STRING strings[5]={ {&checkpoint_start_lsn_str, 8}, {0,0}, {0,0}, {0,0}, {0,0} }; char *ptr; LSN checkpoint_lsn; - LSN candidate_max_rec_lsn_at_last_checkpoint= 0; + LSN candidate_max_rec_lsn_at_last_checkpoint; DBUG_ENTER("execute_checkpoint_indirect"); DBUG_ASSERT(sizeof(byte *) <= 8); DBUG_ASSERT(sizeof(LSN) <= 8); safemutex_assert_owner(log_mutex); + + /* STEP 1: record current end-of-log LSN */ checkpoint_start_lsn= log_read_end_lsn(); if (LSN_IMPOSSIBLE == checkpoint_start_lsn) /* error */ DBUG_RETURN(TRUE); unlock(log_mutex); DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); + int8store(strings[0].str, checkpoint_start_lsn); - /* STEP 1: fetch information about dirty pages */ - /* note: this piece will move into mysys/mf_pagecache.c */ - { - ulong stored_LRD_size= 0; - /* - We lock the entire cache but will be quick, just reading/writing a few MBs - of memory at most. - When we enter here, we must be sure that no "first_in_switch" situation - is happening or will happen (either we have to get rid of - first_in_switch in the code or, first_in_switch has to increment a - "danger" counter for Checkpoint to know it has to wait. TODO. - */ - pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* STEP 2: fetch information about dirty pages */ - /* - This is an over-estimation, as in theory blocks_changed may contain - non-PAGECACHE_LSN_PAGE pages, which we don't want to store into the - checkpoint record; the true number of page-LRD-info we'll store into the - record is stored_LRD_size. - */ - /* - TODO: Ingo says blocks_changed is not a reliable number (see his - document); ask him. - */ - string1.length= 8+8+(8+8+8)*pagecache->blocks_changed; - if (NULL == (string1.str= my_malloc(string1.length))) - goto err; - ptr= string1.str; - int8store(ptr, checkpoint_start_lsn); - ptr+= 8+8; /* don't store stored_LRD_size now, wait */ - if (pagecache->blocks_changed > 0) - { - uint file_hash; - for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) - { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->changed_blocks[file_hash] ; - block; - block= block->next_changed) - { - DBUG_ASSERT(block->hash_link != NULL); - DBUG_ASSERT(block->status & BLOCK_CHANGED); - if (block->type != PAGECACHE_LSN_PAGE) - { - continue; /* no need to store it in the checkpoint record */ - } - /* - In the current pagecache, rec_lsn is not set correctly: - 1) it is set on pagecache_unlock(), too late (a page is dirty - (BLOCK_CHANGED) since the first pagecache_write()). So in this - scenario: - thread1: thread2: - write_REDO - pagecache_write() - checkpoint : reclsn not known - pagecache_unlock(sets rec_lsn) - commit - crash, - at recovery we will wrongly skip the REDO. It also affects the - low-water mark's computation. - 2) sometimes the unlocking can be an implicit action of - pagecache_write(), without any call to pagecache_unlock(), then - rec_lsn is not set. - 1) and 2) are critical problems. - TODO: fix this when Monty has explained how he writes BLOB pages. - */ - if (0 == block->rec_lsn) - abort(); /* always fail in all builds */ + if (pagecache_collect_changed_blocks_with_LSN(pagecache, &strings[1], + &candidate_max_rec_lsn_at_last_checkpoint)) + goto err; - int8store(ptr, block->hash_link->file.file); - ptr+= 8; - int8store(ptr, block->hash_link->pageno); - ptr+= 8; - int8store(ptr, block->rec_lsn); - ptr+= 8; - stored_LRD_size++; - DBUG_ASSERT(stored_LRD_size <= pagecache->blocks_changed); - set_if_bigger(candidate_max_rec_lsn_at_last_checkpoint, - block->rec_lsn); - } - } - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - int8store(string1.str+8, stored_LRD_size); - string1.length= 8+8+(8+8+8)*stored_LRD_size; - } - - /* STEP 2: fetch information about transactions */ + /* STEP 3: fetch information about transactions */ /* note: this piece will move into trnman.c */ /* Transactions are in the "active list" (protected by a mutex) and in a @@ -345,7 +259,7 @@ my_bool execute_checkpoint_indirect() string2.length= 8+(7+2+8+8+8)*stored_trn_size; } - /* STEP 3: fetch information about table files */ + /* STEP 4: fetch information about table files */ { /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ @@ -391,13 +305,8 @@ my_bool execute_checkpoint_indirect() /* LAST STEP: now write the checkpoint log record */ - string_array[0]= string1; - string_array[1]= string2; - string_array[2]= string3; - string_array[3]= NULL; - checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, - &system_trans, string_array); + &system_trans, strings); /* Do nothing between the log write and the control file write, for the @@ -418,9 +327,8 @@ err: end: - my_free(buffer1.str, MYF(MY_ALLOW_ZERO_PTR)); - my_free(buffer2.str, MYF(MY_ALLOW_ZERO_PTR)); - my_free(buffer3.str, MYF(MY_ALLOW_ZERO_PTR)); + for (i= 1; i<5; i++) + my_free(strings[i], MYF(MY_ALLOW_ZERO_PTR)); /* this portion cannot be done as a hook in write_log_record() for the @@ -440,7 +348,6 @@ end: lock(log_mutex); /* That LSN is used for the "two-checkpoint rule" (MEDIUM checkpoints) */ maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; - written_since_last_checkpoint= (my_off_t)0; DBUG_RETURN(FALSE); } lock(log_mutex); @@ -471,6 +378,8 @@ log_write_record(...) thread" WL#3261) to do a checkpoint */ request_asynchronous_checkpoint(INDIRECT); + /* prevent similar redundant requests */ + written_since_last_checkpoint= (my_off_t)0; } ...; unlock(log_mutex); @@ -488,16 +397,13 @@ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); safemutex_assert_owner(log_mutex); DBUG_ASSERT(level > NONE); - if (next_asynchronous_checkpoint_to_do < level) + if ((next_asynchronous_checkpoint_to_do < level) && + (checkpoint_in_progress < level)) { /* no equal or stronger running or to run, we post request */ /* - note that thousands of requests for checkpoints are going to come all - at the same time (when the log bound - MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS is passed), so it may not be a - good idea for each of them to broadcast a cond to wake up the background - checkpoint thread. We just don't broacast a cond, the checkpoint thread - (see least_recently_dirtied.c) will notice our request in max a few + We just don't broacast a cond, the checkpoint thread + (see ma_least_recently_dirtied.c) will notice our request in max a few seconds. */ next_asynchronous_checkpoint_to_do= level; /* post request */ @@ -520,6 +426,7 @@ void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); first_undo_lsn), this function can be used to do a read of it (without mutex, without atomic load) which always produces a correct (though maybe slightly old) value (even on 32-bit CPUs). + The prototype will change with Sanja's new LSN type. */ static inline ulonglong read_non_atomic(ulonglong volatile *x) { diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c index 809442b4e97..170e59a601a 100644 --- a/storage/maria/ma_least_recently_dirtied.c +++ b/storage/maria/ma_least_recently_dirtied.c @@ -36,162 +36,57 @@ #include "least_recently_dirtied.h" /* - MikaelR suggested removing this global_LRD_mutex (I have a paper note of - comments), however at least for the first version we'll start with this - mutex (which will be a LOCK-based atomic_rwlock). -*/ -pthread_mutex_t global_LRD_mutex; - -/* - When we flush a page, we should pin page. - This "pin" is to protect against that: - I make copy, - you modify in memory and flush to disk and remove from LRD and from cache, - I write copy to disk, - checkpoint happens. - result: old page is on disk, page is absent from LRD, your REDO will be - wrongly ignored. - - Pin: there can be multiple pins, flushing imposes that there are zero pins. - For example, pin could be a uint counter protected by the page's latch. - - Maybe it's ok if when there is a page replacement, the replacer does not - remove page from the LRD (it would save global mutex); for that, background - flusher should be prepared to see pages in the LRD which are not in the page - cache (then just ignore them). However checkpoint will contain superfluous - entries and so do more work. -*/ - -#define PAGE_SIZE (16*1024) /* just as an example */ -/* - Optimization: - LRD flusher should not flush pages one by one: to be fast, it flushes a - group of pages in sequential disk order if possible; a group of pages is just - FLUSH_GROUP_SIZE pages. - Key cache has groupping already somehow Monty said (investigate that). -*/ -#define FLUSH_GROUP_SIZE 512 /* 8 MB */ -/* - We don't want to probe for checkpoint requests all the time (it takes - the log mutex). - If FLUSH_GROUP_SIZE is 8MB, assuming a local disk which can write 30MB/s - (1.8GB/min), probing every 16th call to flush_one_group_from_LRD() is every - 16*8=128MB which is every 128/30=4.2second. - Using a power of 2 gives a fast modulo operation. -*/ -#define CHECKPOINT_PROBING_PERIOD_LOG2 4 - -/* - This thread does background flush of pieces of the LRD, and all checkpoints. + This thread does background flush of pieces of the LRD, and serves + requests for asynchronous checkpoints. Just launch it when engine starts. MikaelR questioned why the same thread does two different jobs, the risk could be that while a checkpoint happens no LRD flushing happens. + For now, we only do checkpoints - no LRD flushing (to be done when the + second version of the page cache is ready WL#3077). + Reasons to delay: + - Recovery will work (just slower) + - new page cache may be different, why do then re-do + - current pagecache probably has issues with flushing when somebody is + writing to the table being flushed - better avoid that. */ pthread_handler_decl background_flush_and_checkpoint_thread() { - char *flush_group_buffer= my_malloc(PAGE_SIZE*FLUSH_GROUP_SIZE); - uint flush_calls= 0; while (this_thread_not_killed) { - if ((flush_calls++) & ((2<data, PAGE_SIZE); - pin_page; - page_cache_unlatch(page_id, KEEP_PINNED); /* but keep pinned */ - } - for (scan_the_array) - { - /* - As an optimization, we try to identify contiguous-in-the-file segments (to - issue one big write()). - In non-optimized version, contiguous segment is always only one page. - */ - if ((next_page.page_id - this_page.page_id) == 1) - { - /* - this page and next page are in same file and are contiguous in the - file: add page to contiguous segment... - */ - continue; /* defer write() to next pages */ - } - /* contiguous segment ends */ - my_pwrite(file, contiguous_segment_start_offset, contiguous_segment_size); - /* - note that if we had doublewrite, doublewrite buffer may prevent us from - doing this write() grouping (if doublewrite space is shorter). - */ - } /* - Now remove pages from LRD. As we have pinned them, all pages that we - managed to pin are still in the LRD, in the same order, we can just cut - the LRD at the last element of "array". This is more efficient that - removing element by element (which would take LRD mutex many times) in the - loop above. + Build a list of pages to flush: + changed_blocks[i] is roughly sorted by descending rec_lsn, + so we could do a merge sort of changed_blocks[] lists, stopping after we + have the max_this_number first elements or after we have found a page with + rec_lsn > max_this_lsn. + Then do like pagecache_flush_blocks_int() does (beware! this time we are + not alone on the file! there may be dangers! TODO: sort this out). */ - lock(global_LRD_mutex); - /* cut LRD by bending LRD->first, free cut portion... */ - unlock(global_LRD_mutex); - for (scan_array) - { - /* - if the page has a property "modified since last flush" (i.e. which is - redundant with the presence of the page in the LRD, this property can - just be a pointer to the LRD element) we should reset it - (note that then the property would live slightly longer than - the presence in LRD). - */ - page_cache_unpin(page_id); - /* - order between unpin and removal from LRD is not clear, depends on what - pin actually is. - */ - } - free(array); + /* MikaelR noted that he observed that Linux's file cache may never fsync to disk until this cache is full, at which point it decides to empty the @@ -201,28 +96,11 @@ flush_one_group_from_LRD() } /* - Flushes all page from LRD up to approximately rec_lsn>=max_lsn. - This is approximate because we flush groups, and because the LRD list may + Note that when we flush all page from LRD up to rec_lsn>=max_lsn, + this is approximate because the LRD list may not be exactly sorted by rec_lsn (because for a big row, all pages of the row are inserted into the LRD with rec_lsn being the LSN of the REDO for the first page, so if there are concurrent insertions, the last page of the big row may have a smaller rec_lsn than the previous pages inserted by concurrent inserters). */ -int flush_all_LRD_to_lsn(LSN max_lsn) -{ - lock(global_LRD_mutex); - if (max_lsn == MAX_LSN) /* don't want to flush forever, so make it fixed: */ - max_lsn= LRD->first->prev->rec_lsn; - while (LRD->first->rec_lsn < max_lsn) - { - if (flush_one_group_from_LRD()) /* will unlock LRD mutex */ - return 1; - /* - The scheduler may preempt us here as we released the mutex; this is good. - */ - lock(global_LRD_mutex); - } - unlock(global_LRD_mutex); - return 0; -} diff --git a/unittest/mysys/test_file.h b/unittest/mysys/test_file.h index ea787c123ed..bfc660b13d0 100644 --- a/unittest/mysys/test_file.h +++ b/unittest/mysys/test_file.h @@ -1,4 +1,4 @@ - +#include #include /* From c2f2a41ed32522fa89f5c79fec42ab60e0b4aa62 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 19 Dec 2006 19:15:53 +0100 Subject: [PATCH 80/95] Maria - merging recent changes done to MyISAM into Maria. Plus compiler warnings, and a fix to the pagecache unit tests for IA64 include/maria.h: merging MyISAM into Maria include/myisam.h: post-merge fixes mysql-test/r/maria.result: merging MyISAM into Maria mysql-test/t/maria.test: merging MyISAM into Maria sql/mysqld.cc: post-merge fixes storage/maria/ha_maria.cc: merging MyISAM into Maria storage/maria/ha_maria.h: merging MyISAM into Maria storage/maria/ma_check.c: merging MyISAM into Maria storage/maria/ma_open.c: merging MyISAM into Maria storage/maria/ma_packrec.c: merging MyISAM into Maria storage/maria/ma_range.c: merging MyISAM into Maria storage/maria/ma_sort.c: merging MyISAM into Maria storage/maria/maria_def.h: merging MyISAM into Maria storage/maria/maria_pack.c: merging MyISAM into Maria storage/maria/plug.in: merging MyISAM into Maria storage/myisam/myisamdef.h: merging MyISAM into Maria storage/myisam/myisampack.c: fix for compiler warnings unittest/mysys/mf_pagecache_consist.c: this sets the stack size lower than the minimum on IA64, we remove it (it made the test fail) unittest/mysys/mf_pagecache_single.c: this sets the stack size lower than the minimum on IA64, we remove it (it made the test fail) --- include/maria.h | 15 +- include/myisam.h | 15 ++ mysql-test/r/maria.result | 142 +++++++++++- mysql-test/t/maria.test | 92 ++++++++ sql/mysqld.cc | 2 +- storage/maria/ha_maria.cc | 12 +- storage/maria/ha_maria.h | 2 +- storage/maria/ma_check.c | 305 +++++++++++++++++++++----- storage/maria/ma_open.c | 5 +- storage/maria/ma_packrec.c | 76 ++++--- storage/maria/ma_range.c | 15 ++ storage/maria/ma_sort.c | 268 ++++++++++++---------- storage/maria/maria_def.h | 34 +-- storage/maria/maria_pack.c | 6 +- storage/maria/plug.in | 1 + storage/myisam/myisamdef.h | 12 +- storage/myisam/myisampack.c | 8 +- unittest/mysys/mf_pagecache_consist.c | 8 - unittest/mysys/mf_pagecache_single.c | 8 - 19 files changed, 760 insertions(+), 266 deletions(-) diff --git a/include/maria.h b/include/maria.h index 705ba7c91c7..da9cbd49688 100644 --- a/include/maria.h +++ b/include/maria.h @@ -293,6 +293,17 @@ extern uint maria_get_pointer_length(ulonglong file_length, uint def); #define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */ #define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */ +typedef uint maria_bit_type; + +typedef struct st_maria_bit_buff +{ /* Used for packing of record */ + maria_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MARIA_BIT_BUFF; + + typedef struct st_maria_sort_info { #ifdef THREAD @@ -305,7 +316,6 @@ typedef struct st_maria_sort_info char *buff; SORT_KEY_BLOCKS *key_block, *key_block_end; SORT_FT_BUF *ft_buf; - my_off_t filelength, dupp, buff_length; ha_rows max_records; uint current_key, total_keys; @@ -314,12 +324,12 @@ typedef struct st_maria_sort_info enum data_file_type new_data_file_type; } MARIA_SORT_INFO; - typedef struct st_maria_sort_param { pthread_t thr; IO_CACHE read_cache, tempfile, tempfile_for_exceptions; DYNAMIC_ARRAY buffpek; + MARIA_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ MARIA_KEYDEF *keyinfo; MARIA_SORT_INFO *sort_info; @@ -342,6 +352,7 @@ typedef struct st_maria_sort_param uint key, key_length,real_key_length,sortbuff_size; uint maxbuffers, keys, find_length, sort_keys_length; my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *); int (*key_read)(struct st_maria_sort_param *,void *); diff --git a/include/myisam.h b/include/myisam.h index fc7468a382d..9171fec0b95 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -300,6 +300,17 @@ extern uint mi_get_pointer_length(ulonglong file_length, uint def); #define MYISAMCHK_REPAIR 1 /* equivalent to myisamchk -r */ #define MYISAMCHK_VERIFY 2 /* Verify, run repair if failure */ +typedef uint mi_bit_type; + +typedef struct st_mi_bit_buff +{ /* Used for packing of record */ + mi_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MI_BIT_BUFF; + + typedef struct st_sort_info { #ifdef THREAD @@ -319,10 +330,13 @@ typedef struct st_sort_info myf myf_rw; enum data_file_type new_data_file_type; } MI_SORT_INFO; + typedef struct st_mi_sort_param +{ pthread_t thr; IO_CACHE read_cache, tempfile, tempfile_for_exceptions; DYNAMIC_ARRAY buffpek; + MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ MI_KEYDEF *keyinfo; MI_SORT_INFO *sort_info; @@ -345,6 +359,7 @@ typedef struct st_mi_sort_param uint key, key_length,real_key_length,sortbuff_size; uint maxbuffers, keys, find_length, sort_keys_length; my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); int (*key_read)(struct st_mi_sort_param *,void *); diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 39c7c0b65dd..7812f04336a 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -780,6 +780,132 @@ a b xxxxxxxxx bbbbbb xxxxxxxxx bbbbbb DROP TABLE t1; +SET @@maria_repair_threads=2; +SHOW VARIABLES LIKE 'maria_repair%'; +Variable_name Value +maria_repair_threads 2 +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 0 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1 QUICK; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +SET @@maria_repair_threads=1; +SHOW VARIABLES LIKE 'maria_repair%'; +Variable_name Value +maria_repair_threads 1 drop table if exists t1,t2,t3; --- Testing varchar --- --- Testing varchar --- @@ -985,15 +1111,15 @@ i 10 select sql_big_result v,count(c) from t1 group by v limit 10; v count(c) a 1 -a 10 -b 10 -c 10 -d 10 -e 10 -f 10 -g 10 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 h 10 -i 10 +i 10 select c,count(*) from t1 group by c limit 10; c count(*) a 1 diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 4130831c482..87a38ff60b0 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -733,6 +733,98 @@ UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; SELECT * FROM t1; DROP TABLE t1; +# +# Bug#8283 - OPTIMIZE TABLE causes data loss +# +SET @@maria_repair_threads=2; +SHOW VARIABLES LIKE 'maria_repair%'; +# +# Test OPTIMIZE. This creates a new data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +OPTIMIZE TABLE t1; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +# Test REPAIR QUICK. This retains the old data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +REPAIR TABLE t1 QUICK; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +SET @@maria_repair_threads=1; +SHOW VARIABLES LIKE 'maria_repair%'; # # Test varchar # diff --git a/sql/mysqld.cc b/sql/mysqld.cc index ade7539251b..188f3ae9a69 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3537,7 +3537,7 @@ int main(int argc, char **argv) { if (global_system_variables.log_warnings) sql_print_warning("Asked for %ld thread stack, but got %ld", - my_thread_stack, (long) stack_size); + my_thread_stack_size, (long) stack_size); #if defined(__ia64__) || defined(__ia64) my_thread_stack_size= stack_size*2; #else diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index d9e42467351..56f33693528 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -283,7 +283,7 @@ err: bool ha_maria::check_if_locking_is_allowed(uint sql_command, ulong type, TABLE *table, uint count, - bool called_by_logger_thread) + bool called_by_privileged_thread) { /* To be able to open and lock for reading system tables like 'mysql.proc', @@ -300,10 +300,10 @@ bool ha_maria::check_if_locking_is_allowed(uint sql_command, /* Deny locking of the log tables, which is incompatible with - concurrent insert. Unless called from a logger THD: - general_log_thd or slow_log_thd. + concurrent insert. Unless called from a logger THD (general_log_thd + or slow_log_thd) or by a privileged thread. */ - if (!called_by_logger_thread) + if (!called_by_privileged_thread) return check_if_log_table_locking_is_allowed(sql_command, type, table); return TRUE; @@ -1368,7 +1368,7 @@ void ha_maria::position(const byte * record) } -void ha_maria::info(uint flag) +int ha_maria::info(uint flag) { MARIA_INFO info; char name_buff[FN_REFLEN]; @@ -1428,6 +1428,8 @@ void ha_maria::info(uint flag) /* Faster to always update, than to do it based on flag */ stats.update_time= info.update_time; stats.auto_increment_value= info.auto_increment; + + return 0; } diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index e4edcc80982..52f289a7428 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -103,7 +103,7 @@ public: int rnd_pos(byte * buf, byte * pos); int restart_rnd_next(byte * buf, byte * pos); void position(const byte * record); - void info(uint); + int info(uint); int extra(enum ha_extra_function operation); int extra_opt(enum ha_extra_function operation, ulong cache_size); int reset(void); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index f997b90a61c..2a800ae41ad 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -16,6 +16,31 @@ /* Describe, check and repair of MARIA tables */ +/* + About checksum calculation. + + There are two types of checksums. Table checksum and row checksum. + + Row checksum is an additional byte at the end of dynamic length + records. It must be calculated if the table is configured for them. + Otherwise they must not be used. The variable + MYISAM_SHARE::calc_checksum determines if row checksums are used. + MI_INFO::checksum is used as temporary storage during row handling. + For parallel repair we must assure that only one thread can use this + variable. There is no problem on the write side as this is done by one + thread only. But when checking a record after read this could go + wrong. But since all threads read through a common read buffer, it is + sufficient if only one thread checks it. + + Table checksum is an eight byte value in the header of the index file. + It can be calculated even if row checksums are not used. The variable + MI_CHECK::glob_crc is calculated over all records. + MI_SORT_PARAM::calc_checksum determines if this should be done. This + variable is not part of MI_CHECK because it must be set per thread for + parallel repair. The global glob_crc must be changed by one thread + only. And it is sufficient to calculate the checksum once only. +*/ + #include "ma_ftdefs.h" #include #include @@ -42,8 +67,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, ha_checksum *key_checksum, uint level); static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); -static int writekeys(HA_CHECK *param, MARIA_HA *info,byte *buff, - my_off_t filepos); +static int writekeys(MARIA_SORT_PARAM *sort_param); static int sort_one_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, my_off_t pagepos, File new_file); static int sort_key_read(MARIA_SORT_PARAM *sort_param,void *key); @@ -1159,7 +1183,8 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) goto err; start_recpos=pos; splits++; - VOID(_ma_pack_get_block_info(info,&block_info, -1, start_recpos)); + VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, -1, start_recpos)); pos=block_info.filepos+block_info.rec_len; if (block_info.rec_len < (uint) info->s->min_pack_length || block_info.rec_len > (uint) info->s->max_pack_length) @@ -1173,7 +1198,8 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) if (_ma_read_cache(¶m->read_cache,(byte*) info->rec_buff, block_info.filepos, block_info.rec_len, READING_NEXT)) goto err; - if (_ma_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len)) + if (_ma_pack_rec_unpack(info, &info->bit_buff, record, + info->rec_buff, block_info.rec_len)) { _ma_check_print_error(param,"Found wrong record at %s", llstr(start_recpos,llbuff)); @@ -1457,7 +1483,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, info->state->empty=0; param->glob_crc=0; if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; + sort_param.calc_checksum= 1; info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); @@ -1486,7 +1512,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, /* Re-create all keys, which are set in key_map. */ while (!(error=sort_get_next_record(&sort_param))) { - if (writekeys(param,info,(byte*)sort_param.record,sort_param.filepos)) + if (writekeys(&sort_param)) { if (my_errno != HA_ERR_FOUND_DUPP_KEY) goto err; @@ -1631,11 +1657,13 @@ err: /* Uppate keyfile when doing repair */ -static int writekeys(HA_CHECK *param, register MARIA_HA *info, byte *buff, - my_off_t filepos) +static int writekeys(MARIA_SORT_PARAM *sort_param) { register uint i; - uchar *key; + uchar *key; + MARIA_HA *info= sort_param->sort_info->info; + byte *buff= sort_param->record; + my_off_t filepos= sort_param->filepos; DBUG_ENTER("writekeys"); key=info->lastkey+info->s->base.max_key_length; @@ -1689,8 +1717,8 @@ static int writekeys(HA_CHECK *param, register MARIA_HA *info, byte *buff, } } /* Remove checksum that was added to glob_crc in sort_get_next_record */ - if (param->calc_checksum) - param->glob_crc-= info->checksum; + if (sort_param->calc_checksum) + sort_param->sort_info->param->glob_crc-= info->checksum; DBUG_PRINT("error",("errno: %d",my_errno)); DBUG_RETURN(-1); } /* writekeys */ @@ -2180,7 +2208,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, del=info->state->del; param->glob_crc=0; if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; + sort_param.calc_checksum= 1; rec_per_key_part= param->rec_per_key_part; for (sort_param.key=0 ; sort_param.key < share->base.keys ; @@ -2266,7 +2294,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, param->retry_repair=1; goto err; } - param->calc_checksum=0; /* No need to calc glob_crc */ + /* No need to calculate checksum again. */ + sort_param.calc_checksum= 0; free_root(&sort_param.wordroot, MYF(0)); /* Set for next loop */ @@ -2430,6 +2459,28 @@ err: Each key is handled by a separate thread. TODO: make a number of threads a parameter + In parallel repair we use one thread per index. There are two modes: + + Quick + + Only the indexes are rebuilt. All threads share a read buffer. + Every thread that needs fresh data in the buffer enters the shared + cache lock. The last thread joining the lock reads the buffer from + the data file and wakes all other threads. + + Non-quick + + The data file is rebuilt and all indexes are rebuilt to point to + the new record positions. One thread is the master thread. It + reads from the old data file and writes to the new data file. It + also creates one of the indexes. The other threads read from a + buffer which is filled by the master. If they need fresh data, + they enter the shared cache lock. If the masters write buffer is + full, it flushes it to the new data file and enters the shared + cache lock too. When all threads joined in the lock, the master + copies its write buffer to the read buffer for the other threads + and wakes them. + RESULT 0 ok <>0 Error @@ -2452,6 +2503,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, ulong *rec_per_key_part; HA_KEYSEG *keyseg; char llbuff[22]; + IO_CACHE new_data_cache; /* For non-quick repair. */ IO_CACHE_SHARE io_share; MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; @@ -2473,19 +2525,55 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; + /* + Quick repair (not touching data file, rebuilding indexes): + { + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + } + + Non-quick repair (rebuilding data file and indexes): + { + Master thread: + + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + Write cache is (MI_INFO *info)->rec_cache using new_file. + + Slave threads: + + Read cache is new_data_cache synced to master rec_cache. + + The final assignment of the filedescriptor for rec_cache is done + after the cache creation. + + Don't check file size on new_data_cache, as the resulting file size + is not known yet. + + As rec_cache and new_data_cache are synced, write_buffer_length is + used for the read cache 'new_data_cache'. Both start at the same + position 'new_header_length'. + } + */ + DBUG_PRINT("info", ("is quick repair: %d", rep_quick)); bzero((char*)&sort_info,sizeof(sort_info)); + /* Initialize pthread structures before goto err. */ + pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&sort_info.cond, 0); + if (!(sort_info.key_block= - alloc_key_blocks(param, - (uint) param->sort_key_blocks, - share->base.max_key_block_length)) - || init_io_cache(¶m->read_cache,info->dfile, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || - (! rep_quick && - init_io_cache(&info->rec_cache,info->dfile, - (uint) param->write_buffer_length, - WRITE_CACHE,new_header_length,1, - MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + alloc_key_blocks(param, (uint) param->sort_key_blocks, + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) || + (!rep_quick && + (init_io_cache(&info->rec_cache, info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) || + init_io_cache(&new_data_cache, -1, + (uint) param->write_buffer_length, + READ_CACHE, new_header_length, 1, + MYF(MY_WME | MY_DONT_CHECK_FILESIZE))))) goto err; sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; @@ -2576,8 +2664,6 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, del=info->state->del; param->glob_crc=0; - if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; if (!(sort_param=(MARIA_SORT_PARAM *) my_malloc((uint) share->base.keys * @@ -2627,6 +2713,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, sort_param[i].sort_info=&sort_info; sort_param[i].master=0; sort_param[i].fix_datafile=0; + sort_param[i].calc_checksum= 0; sort_param[i].filepos=new_header_length; sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length; @@ -2664,19 +2751,45 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, sort_info.total_keys=i; sort_param[0].master= 1; sort_param[0].fix_datafile= (my_bool)(! rep_quick); + sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM); sort_info.got_error=0; - pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); - pthread_cond_init(&sort_info.cond, 0); pthread_mutex_lock(&sort_info.mutex); - init_io_cache_share(¶m->read_cache, &io_share, i); + /* + Initialize the I/O cache share for use with the read caches and, in + case of non-quick repair, the write cache. When all threads join on + the cache lock, the writer copies the write cache contents to the + read caches. + */ + if (i > 1) + { + if (rep_quick) + init_io_cache_share(¶m->read_cache, &io_share, NULL, i); + else + init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i); + } + else + io_share.total_threads= 0; /* share not used */ + (void) pthread_attr_init(&thr_attr); (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); for (i=0 ; i < sort_info.total_keys ; i++) { - sort_param[i].read_cache=param->read_cache; + /* + Copy the properly initialized IO_CACHE structure so that every + thread has its own copy. In quick mode param->read_cache is shared + for use by all threads. In non-quick mode all threads but the + first copy the shared new_data_cache, which is synchronized to the + write cache of the first thread. The first thread copies + param->read_cache, which is not shared. + */ + sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache : + new_data_cache); + DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx", + i, (long) &sort_param[i].read_cache)); + /* two approaches: the same amount of memory for each thread or the memory for the same number of keys for each thread... @@ -2694,7 +2807,10 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, (void *) (sort_param+i))) { _ma_check_print_error(param,"Cannot start a repair thread"); - remove_io_thread(¶m->read_cache); + /* Cleanup: Detach from the share. Avoid others to be blocked. */ + if (io_share.total_threads) + remove_io_thread(&sort_param[i].read_cache); + DBUG_PRINT("error", ("Cannot start a repair thread")); sort_info.got_error=1; } else @@ -2716,6 +2832,11 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (sort_param[0].fix_datafile) { + /* + Append some nuls to the end of a memory mapped file. Destroy the + write cache. The master thread did already detach from the share + by remove_io_thread() in sort.c:thr_find_all_keys(). + */ if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) goto err; if (param->testflag & T_SAFE_REPAIR) @@ -2731,8 +2852,13 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, sort_param->filepos; /* Only whole records */ share->state.version=(ulong) time((time_t*) 0); + /* + Exchange the data file descriptor of the table, so that we use the + new file from now on. + */ my_close(info->dfile,MYF(0)); info->dfile=new_file; + share->data_file_type=sort_info.new_data_file_type; share->pack.header_length=(ulong) new_header_length; } @@ -2787,7 +2913,20 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, err: got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile); + /* + Destroy the write cache. The master thread did already detach from + the share by remove_io_thread() or it was not yet started (if the + error happend before creating the thread). + */ VOID(end_io_cache(&info->rec_cache)); + /* + Destroy the new data cache in case of non-quick repair. All slave + threads did either detach from the share by remove_io_thread() + already or they were not yet started (if the error happend before + creating the threads). + */ + if (!rep_quick) + VOID(end_io_cache(&new_data_cache)); if (!got_error) { /* Replace the actual file with the temporary file */ @@ -2920,12 +3059,41 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, void *key) } /* sort_maria_ft_key_read */ - /* Read next record from file using parameters in sort_info */ - /* Return -1 if end of file, 0 if ok and > 0 if error */ +/* + Read next record from file using parameters in sort_info. + + SYNOPSIS + sort_get_next_record() + sort_param Information about and for the sort process + + NOTE + + Dynamic Records With Non-Quick Parallel Repair + + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. + + RETURN + -1 end of file + 0 ok + > 0 error +*/ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) { int searching; + int parallel_flag; uint found_record,b_type,left_length; my_off_t pos; byte *to; @@ -2963,7 +3131,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength); if (*sort_param->record) { - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc+= (info->checksum= _ma_static_checksum(info,sort_param->record)); DBUG_RETURN(0); @@ -2978,6 +3146,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) LINT_INIT(to); pos=sort_param->pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); + parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; for (;;) { found_record=block_info.second_read= 0; @@ -3008,7 +3177,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) (byte*) block_info.header,pos, MARIA_BLOCK_INFO_HEADER_LENGTH, (! found_record ? READING_NEXT : 0) | - READING_HEADER)) + parallel_flag | READING_HEADER)) { if (found_record) { @@ -3185,9 +3354,31 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) llstr(sort_param->start_recpos,llbuff)); goto try_next; } - if (_ma_read_cache(&sort_param->read_cache,to,block_info.filepos, - block_info.data_len, - (found_record == 1 ? READING_NEXT : 0))) + /* + Copy information that is already read. Avoid accessing data + below the cache start. This could happen if the header + streched over the end of the previous buffer contents. + */ + { + uint header_len= (uint) (block_info.filepos - pos); + uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy(to, block_info.header + header_len, prefetch_len); + block_info.filepos+= prefetch_len; + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + if (block_info.data_len && + _ma_read_cache(&sort_param->read_cache,to,block_info.filepos, + block_info.data_len, + (found_record == 1 ? READING_NEXT : 0) | + parallel_flag)) { _ma_check_print_info(param, "Read error for block at: %s (error: %d); Skipped", @@ -3217,13 +3408,14 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) { if (sort_param->read_cache.error < 0) DBUG_RETURN(1); - if (info->s->calc_checksum) - info->checksum=_ma_checksum(info,sort_param->record); + if (sort_param->calc_checksum) + info->checksum= _ma_checksum(info, sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, sort_param->find_length, (param->testflag & T_QUICK) && + sort_param->calc_checksum && test(info->s->calc_checksum))) { _ma_check_print_info(param,"Found wrong packed record at %s", @@ -3231,7 +3423,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) goto try_next; } } - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc+= info->checksum; DBUG_RETURN(0); } @@ -3258,7 +3450,8 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(1); /* Something wrong with data */ } sort_param->start_recpos=sort_param->pos; - if (_ma_pack_get_block_info(info,&block_info,-1,sort_param->pos)) + if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info, + &sort_param->rec_buff, -1, sort_param->pos)) DBUG_RETURN(-1); if (!block_info.rec_len && sort_param->pos + MEMMAP_EXTRA_MARGIN == @@ -3282,15 +3475,14 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) llstr(sort_param->pos,llbuff)); continue; } - if (_ma_pack_rec_unpack(info,sort_param->record,sort_param->rec_buff, - block_info.rec_len)) + if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record, + sort_param->rec_buff, block_info.rec_len)) { if (! searching) _ma_check_print_info(param,"Found wrong record at %s", llstr(sort_param->pos,llbuff)); continue; } - info->checksum=_ma_checksum(info,sort_param->record); if (!sort_param->fix_datafile) { sort_param->filepos=sort_param->pos; @@ -3300,8 +3492,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos=block_info.filepos+ block_info.rec_len); info->packed_length=block_info.rec_len; - if (param->calc_checksum) - param->glob_crc+= info->checksum; + if (sort_param->calc_checksum) + param->glob_crc+= (info->checksum= + _ma_checksum(info, sort_param->record)); DBUG_RETURN(0); } } @@ -3309,7 +3502,20 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } - /* Write record to new file */ +/* + Write record to new file. + + SYNOPSIS + _ma_sort_write_record() + sort_param Sort parameters. + + NOTE + This is only called by a master thread if parallel repair is used. + + RETURN + 0 OK + 1 Error +*/ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) { @@ -3358,6 +3564,7 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) } from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); } + /* We can use info->checksum here as only one thread calls this. */ info->checksum=_ma_checksum(info,sort_param->record); reclength= _ma_rec_pack(info,from,sort_param->record); flag=0; @@ -3767,7 +3974,7 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(1); } } - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); } error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info); diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index cf21ccb09e5..4041c101bde 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -211,7 +211,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && (my_disable_locking && share->state.open_count)))) { - DBUG_PRINT("error",("Table is marked as crashed")); + DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u " + "changed: %u open_count: %u !locking: %d", + open_flags, share->state.changed, + share->state.open_count, my_disable_locking)); my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); goto err; diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c index 3be893f39f8..c29cf1a672a 100644 --- a/storage/maria/ma_packrec.c +++ b/storage/maria/ma_packrec.c @@ -104,7 +104,8 @@ static void fill_buffer(MARIA_BIT_BUFF *bit_buff); static uint max_bit(uint value); static uint read_pack_length(uint version, const uchar *buf, ulong *length); #ifdef HAVE_MMAP -static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, byte **rec_buff_p, uchar *header); #endif @@ -450,13 +451,15 @@ int _ma_read_pack_record(MARIA_HA *info, my_off_t filepos, byte *buf) DBUG_RETURN(-1); /* _search() didn't find record */ file=info->dfile; - if (_ma_pack_get_block_info(info, &block_info, file, filepos)) + if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, file, filepos)) goto err; if (my_read(file,(byte*) info->rec_buff + block_info.offset , block_info.rec_len - block_info.offset, MYF(MY_NABP))) goto panic; info->update|= HA_STATE_AKTIV; - DBUG_RETURN(_ma_pack_rec_unpack(info,buf,info->rec_buff,block_info.rec_len)); + DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); panic: my_errno=HA_ERR_WRONG_IN_RECORD; err: @@ -465,8 +468,8 @@ err: -int _ma_pack_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, - ulong reclength) +int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, + register byte *to, byte *from, ulong reclength) { byte *end_field; reg3 MARIA_COLUMNDEF *end; @@ -474,18 +477,18 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, MARIA_SHARE *share=info->s; DBUG_ENTER("_ma_pack_rec_unpack"); - init_bit_buffer(&info->bit_buff, (uchar*) from,reclength); + init_bit_buffer(bit_buff, (uchar*) from, reclength); for (current_field=share->rec, end=current_field+share->base.fields ; current_field < end ; current_field++,to=end_field) { end_field=to+current_field->length; - (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to, + (*current_field->unpack)(current_field, bit_buff, (uchar*) to, (uchar*) end_field); } - if (! info->bit_buff.error && - info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end) + if (!bit_buff->error && + bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) DBUG_RETURN(0); info->update&= ~HA_STATE_AKTIV; DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); @@ -1016,13 +1019,16 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, if (info->opt_flag & READ_CACHE_USED) { - if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, - share->pack.ref_length, skip_deleted_blocks)) + if (_ma_read_cache(&info->rec_cache, (byte*) block_info.header, + filepos, share->pack.ref_length, + skip_deleted_blocks ? READING_NEXT : 0)) goto err; - b_type= _ma_pack_get_block_info(info,&block_info,-1, filepos); + b_type= _ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, -1, filepos); } else - b_type= _ma_pack_get_block_info(info,&block_info,info->dfile,filepos); + b_type= _ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, info->dfile, filepos); if (b_type) goto err; /* Error code is already set */ #ifndef DBUG_OFF @@ -1035,9 +1041,9 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, if (info->opt_flag & READ_CACHE_USED) { - if (_ma_read_cache(&info->rec_cache,(byte*) info->rec_buff, - block_info.filepos, block_info.rec_len, - skip_deleted_blocks)) + if (_ma_read_cache(&info->rec_cache, (byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, + skip_deleted_blocks ? READING_NEXT : 0)) goto err; } else @@ -1052,8 +1058,8 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, info->nextpos=block_info.filepos+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; - DBUG_RETURN (_ma_pack_rec_unpack(info,buf,info->rec_buff, - block_info.rec_len)); + DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); err: DBUG_RETURN(my_errno); } @@ -1061,8 +1067,9 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, /* Read and process header from a huff-record-file */ -uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, File file, - my_off_t filepos) +uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, byte **rec_buff_p, + File file, my_off_t filepos) { uchar *header=info->header; uint head_length,ref_length; @@ -1087,17 +1094,17 @@ uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, File file, head_length+= read_pack_length((uint) maria->s->pack.version, header + head_length, &info->blob_len); if (!(_ma_alloc_rec_buff(maria,info->rec_len + info->blob_len, - &maria->rec_buff))) + rec_buff_p))) return BLOCK_FATAL_ERROR; /* not enough memory */ - maria->bit_buff.blob_pos=(uchar*) maria->rec_buff+info->rec_len; - maria->bit_buff.blob_end= maria->bit_buff.blob_pos+info->blob_len; + bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len; + bit_buff->blob_end= bit_buff->blob_pos + info->blob_len; maria->blob_length=info->blob_len; } info->filepos=filepos+head_length; if (file > 0) { info->offset=min(info->rec_len, ref_length - head_length); - memcpy(maria->rec_buff, header+head_length, info->offset); + memcpy(*rec_buff_p, header + head_length, info->offset); } return 0; } @@ -1215,7 +1222,8 @@ void _ma_unmap_file(MARIA_HA *info) } -static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, byte **rec_buff_p, uchar *header) { header+= read_pack_length((uint) maria->s->pack.version, header, @@ -1226,10 +1234,10 @@ static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, &info->blob_len); /* _ma_alloc_rec_buff sets my_errno on error */ if (!(_ma_alloc_rec_buff(maria, info->blob_len, - &maria->rec_buff))) + rec_buff_p))) return 0; /* not enough memory */ - maria->bit_buff.blob_pos=(uchar*) maria->rec_buff; - maria->bit_buff.blob_end= (uchar*) maria->rec_buff + info->blob_len; + bit_buff->blob_pos= (uchar*) *rec_buff_p; + bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len; } return header; } @@ -1245,11 +1253,13 @@ static int _ma_read_mempack_record(MARIA_HA *info, my_off_t filepos, byte *buf) if (filepos == HA_OFFSET_ERROR) DBUG_RETURN(-1); /* _search() didn't find record */ - if (!(pos= (byte*) _ma_mempack_get_block_info(info,&block_info, + if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, (uchar*) share->file_map+ filepos))) DBUG_RETURN(-1); - DBUG_RETURN(_ma_pack_rec_unpack(info, buf, pos, block_info.rec_len)); + DBUG_RETURN(_ma_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); } @@ -1269,7 +1279,8 @@ static int _ma_read_rnd_mempack_record(MARIA_HA *info, byte *buf, my_errno=HA_ERR_END_OF_FILE; goto err; } - if (!(pos= (byte*) _ma_mempack_get_block_info(info,&block_info, + if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, (uchar*) (start=share->file_map+ filepos)))) @@ -1286,7 +1297,8 @@ static int _ma_read_rnd_mempack_record(MARIA_HA *info, byte *buf, info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; - DBUG_RETURN (_ma_pack_rec_unpack(info,buf,pos, block_info.rec_len)); + DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); err: DBUG_RETURN(my_errno); } diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c index 0f6883f4c9d..c837fe5e6e3 100644 --- a/storage/maria/ma_range.c +++ b/storage/maria/ma_range.c @@ -71,6 +71,21 @@ ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key, uchar * key_buff; uint start_key_len; + /* + The problem is that the optimizer doesn't support + RTree keys properly at the moment. + Hope this will be fixed some day. + But now NULL in the min_key means that we + didn't make the task for the RTree key + and expect BTree functionality from it. + As it's not able to handle such request + we return the error. + */ + if (!min_key) + { + res= HA_POS_ERROR; + break; + } key_buff= info->lastkey+info->s->base.max_key_length; start_key_len= _ma_pack_key(info,inx, key_buff, (uchar*) min_key->key, min_key->length, diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index 795bfdb7fda..c9e3a369851 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -20,6 +20,11 @@ */ #include "ma_fulltext.h" +#if defined(MSDOS) || defined(__WIN__) +#include +#else +#include +#endif #include /* static variables */ @@ -143,7 +148,8 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info,my_bool no_messages, skr=maxbuffer; if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer || (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/ - (sort_length+sizeof(char*))) <= 1) + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) { _ma_check_print_error(info->sort_info->param, "sort_buffer_size is to small"); @@ -304,7 +310,7 @@ static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, pthread_handler_t _ma_thr_find_all_keys(void *arg) { - MARIA_SORT_PARAM *info= (MARIA_SORT_PARAM*) arg; + MARIA_SORT_PARAM *sort_param= (MARIA_SORT_PARAM*) arg; int error; uint memavl,old_memavl,keys,sort_length; uint idx, maxbuffer; @@ -316,138 +322,163 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) if (my_thread_init()) goto err; - if (info->sort_info->got_error) - goto err; - if (info->keyinfo->flag && HA_VAR_LENGTH_KEY) - { - info->write_keys=write_keys_varlen; - info->read_to_buffer=read_to_buffer_varlen; - info->write_key=write_merge_key_varlen; - } - else - { - info->write_keys=write_keys; - info->read_to_buffer=read_to_buffer; - info->write_key=write_merge_key; - } + { /* Add extra block since DBUG_ENTER declare variables */ + DBUG_ENTER("_ma_thr_find_all_keys"); + DBUG_PRINT("enter", ("master: %d", sort_param->master)); + if (sort_param->sort_info->got_error) + goto err; - my_b_clear(&info->tempfile); - my_b_clear(&info->tempfile_for_exceptions); - bzero((char*) &info->buffpek,sizeof(info->buffpek)); - bzero((char*) &info->unique, sizeof(info->unique)); - sort_keys= (uchar **) NULL; - - memavl=max(info->sortbuff_size, MIN_SORT_MEMORY); - idx= info->sort_info->max_records; - sort_length= info->key_length; - maxbuffer= 1; - - while (memavl >= MIN_SORT_MEMORY) - { - if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <= - (my_off_t) memavl) - keys= idx+1; + if (sort_param->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + sort_param->write_keys= write_keys_varlen; + sort_param->read_to_buffer= read_to_buffer_varlen; + sort_param->write_key= write_merge_key_varlen; + } else { - uint skr; - do - { - skr=maxbuffer; - if (memavl < sizeof(BUFFPEK)*maxbuffer || - (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ - (sort_length+sizeof(char*))) <= 1) - { - _ma_check_print_error(info->sort_info->param, - "sort_buffer_size is to small"); - goto err; - } - } - while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); + sort_param->write_keys= write_keys; + sort_param->read_to_buffer= read_to_buffer; + sort_param->write_key= write_merge_key; } - if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ - ((info->keyinfo->flag & HA_FULLTEXT) ? - HA_FT_MAXBYTELEN : 0), MYF(0)))) + + my_b_clear(&sort_param->tempfile); + my_b_clear(&sort_param->tempfile_for_exceptions); + bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek)); + bzero((char*) &sort_param->unique, sizeof(sort_param->unique)); + sort_keys= (uchar **) NULL; + + memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY); + idx= sort_param->sort_info->max_records; + sort_length= sort_param->key_length; + maxbuffer= 1; + + while (memavl >= MIN_SORT_MEMORY) { - if (my_init_dynamic_array(&info->buffpek, sizeof(BUFFPEK), - maxbuffer, maxbuffer/2)) - { - my_free((gptr) sort_keys,MYF(0)); - sort_keys= (uchar **) NULL; /* for err: label */ - } + if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= idx+1; else - break; + { + uint skr; + do + { + skr= maxbuffer; + if (memavl < sizeof(BUFFPEK)*maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) + { + _ma_check_print_error(sort_param->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); + } + if ((sort_keys= (uchar **) + my_malloc(keys*(sort_length+sizeof(char*))+ + ((sort_param->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) + { + if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK), + maxbuffer, maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= (uchar **) NULL; /* for err: label */ + } + else + break; + } + old_memavl= memavl; + if ((memavl= memavl/4*3) < MIN_SORT_MEMORY && + old_memavl > MIN_SORT_MEMORY) + memavl= MIN_SORT_MEMORY; } - old_memavl=memavl; - if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY) - memavl=MIN_SORT_MEMORY; - } - if (memavl < MIN_SORT_MEMORY) - { - _ma_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ - goto err; /* purecov: tested */ - } - - if (info->sort_info->param->testflag & T_VERBOSE) - printf("Key %d - Allocating buffer for %d keys\n",info->key+1,keys); - info->sort_keys=sort_keys; - - idx=error=0; - sort_keys[0]=(uchar*) (sort_keys+keys); - - while (!(error=info->sort_info->got_error) && - !(error=(*info->key_read)(info,sort_keys[idx]))) - { - if (info->real_key_length > info->key_length) + if (memavl < MIN_SORT_MEMORY) { - if (write_key(info,sort_keys[idx], &info->tempfile_for_exceptions)) - goto err; - continue; + _ma_check_print_error(sort_param->sort_info->param, + "Sort buffer too small"); + goto err; /* purecov: tested */ } - if (++idx == keys) + if (sort_param->sort_info->param->testflag & T_VERBOSE) + printf("Key %d - Allocating buffer for %d keys\n", + sort_param->key+1, keys); + sort_param->sort_keys= sort_keys; + + idx= error= 0; + sort_keys[0]= (uchar*) (sort_keys+keys); + + DBUG_PRINT("info", ("reading keys")); + while (!(error= sort_param->sort_info->got_error) && + !(error= (*sort_param->key_read)(sort_param, sort_keys[idx]))) { - if (info->write_keys(info,sort_keys,idx-1, - (BUFFPEK *)alloc_dynamic(&info->buffpek), - &info->tempfile)) - goto err; - sort_keys[0]=(uchar*) (sort_keys+keys); - memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); - idx=1; + if (sort_param->real_key_length > sort_param->key_length) + { + if (write_key(sort_param,sort_keys[idx], + &sort_param->tempfile_for_exceptions)) + goto err; + continue; + } + + if (++idx == keys) + { + if (sort_param->write_keys(sort_param, sort_keys, idx - 1, + (BUFFPEK *)alloc_dynamic(&sort_param->buffpek), + &sort_param->tempfile)) + goto err; + sort_keys[0]= (uchar*) (sort_keys+keys); + memcpy(sort_keys[0], sort_keys[idx - 1], (size_t) sort_param->key_length); + idx= 1; + } + sort_keys[idx]=sort_keys[idx - 1] + sort_param->key_length; } - sort_keys[idx]=sort_keys[idx-1]+info->key_length; - } - if (error > 0) - goto err; - if (info->buffpek.elements) - { - if (info->write_keys(info,sort_keys, idx, - (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile)) + if (error > 0) goto err; - info->keys=(info->buffpek.elements-1)*(keys-1)+idx; - } - else - info->keys=idx; + if (sort_param->buffpek.elements) + { + if (sort_param->write_keys(sort_param,sort_keys, idx, + (BUFFPEK *) alloc_dynamic(&sort_param->buffpek), &sort_param->tempfile)) + goto err; + sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx; + } + else + sort_param->keys= idx; - info->sort_keys_length=keys; - goto ok; + sort_param->sort_keys_length= keys; + goto ok; err: - info->sort_info->got_error=1; /* no need to protect this with a mutex */ - if (sort_keys) - my_free((gptr) sort_keys,MYF(0)); - info->sort_keys=0; - delete_dynamic(& info->buffpek); - close_cached_file(&info->tempfile); - close_cached_file(&info->tempfile_for_exceptions); + DBUG_PRINT("error", ("got some error")); + sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */ + if (sort_keys) + my_free((gptr) sort_keys,MYF(0)); + sort_param->sort_keys=0; + delete_dynamic(& sort_param->buffpek); + close_cached_file(&sort_param->tempfile); + close_cached_file(&sort_param->tempfile_for_exceptions); ok: - free_root(&info->wordroot, MYF(0)); - remove_io_thread(&info->read_cache); - pthread_mutex_lock(&info->sort_info->mutex); - info->sort_info->threads_running--; - pthread_cond_signal(&info->sort_info->cond); - pthread_mutex_unlock(&info->sort_info->mutex); + free_root(&sort_param->wordroot, MYF(0)); + /* + Detach from the share if the writer is involved. Avoid others to + be blocked. This includes a flush of the write buffer. This will + also indicate EOF to the readers. + */ + if (sort_param->sort_info->info->rec_cache.share) + remove_io_thread(&sort_param->sort_info->info->rec_cache); + + /* Readers detach from the share if any. Avoid others to be blocked. */ + if (sort_param->read_cache.share) + remove_io_thread(&sort_param->read_cache); + + pthread_mutex_lock(&sort_param->sort_info->mutex); + if (!--sort_param->sort_info->threads_running) + pthread_cond_signal(&sort_param->sort_info->cond); + pthread_mutex_unlock(&sort_param->sort_info->mutex); + DBUG_PRINT("exit", ("======== ending thread ========")); + } my_thread_end(); return NULL; } @@ -465,6 +496,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) MARIA_SHARE *share=info->s; MARIA_SORT_PARAM *sinfo; byte *mergebuf=0; + DBUG_ENTER("_ma_thr_write_keys"); LINT_INIT(length); for (i= 0, sinfo= sort_param ; @@ -474,6 +506,8 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) if (!sinfo->sort_keys) { got_error=1; + my_free(_ma_get_rec_buff_ptr(info, sinfo->rec_buff), + MYF(MY_ALLOW_ZERO_PTR)); continue; } if (!got_error) @@ -513,7 +547,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) { if (got_error) continue; - if (sinfo->keyinfo->flag && HA_VAR_LENGTH_KEY) + if (sinfo->keyinfo->flag & HA_VAR_LENGTH_KEY) { sinfo->write_keys=write_keys_varlen; sinfo->read_to_buffer=read_to_buffer_varlen; @@ -602,7 +636,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) } } my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR)); - return got_error; + DBUG_RETURN(got_error); } #endif /* THREAD */ diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 62c50187888..5fb25512d4c 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -69,6 +69,7 @@ typedef struct st_maria_state_info ulong update_count; /* Updated for each write lock */ ulong status; ulong *rec_per_key_part; + ha_checksum checksum; /* Table checksum */ my_off_t *key_root; /* Start of key trees */ my_off_t *key_del; /* delete links for trees */ my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ @@ -179,6 +180,7 @@ typedef struct st_maria_share */ MARIA_DECODE_TREE *decode_trees; uint16 *decode_tables; + /* Function to use for a row checksum. */ int(*read_record) (struct st_maria_info *, my_off_t, byte *); int(*write_record) (struct st_maria_info *, const byte *); int(*update_record) (struct st_maria_info *, my_off_t, const byte *); @@ -230,16 +232,6 @@ typedef struct st_maria_share } MARIA_SHARE; -typedef uint maria_bit_type; - -typedef struct st_maria_bit_buff -{ /* Used for packing of record */ - maria_bit_type current_byte; - uint bits; - uchar *pos, *end, *blob_pos, *blob_end; - uint error; -} MARIA_BIT_BUFF; - struct st_maria_info { MARIA_SHARE *s; /* Shared between open:s */ @@ -273,7 +265,7 @@ struct st_maria_info my_off_t last_keypage; /* Last key page read */ my_off_t last_search_keypage; /* Last keypage when searching */ my_off_t dupp_key_pos; - ha_checksum checksum; + ha_checksum checksum; /* Temp storage for row checksum */ /* QQ: the folloing two xxx_length fields should be removed, as they are not compatible with parallel repair @@ -354,8 +346,15 @@ struct st_maria_info #define maria_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ mi_int2store(x,boh); } #define _ma_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) -#define maria_mark_crashed(x) (x)->s->state.changed|=STATE_CRASHED -#define maria_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; } +#define maria_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ + DBUG_PRINT("error", ("Marked table crashed")); \ + }while(0) +#define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \ + STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \ + (x)->update|= HA_STATE_CHANGED; \ + DBUG_PRINT("error", \ + ("Marked table crashed")); \ + }while(0) #define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) #define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) #define maria_print_error(SHARE, ERRNO) \ @@ -608,8 +607,8 @@ extern my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys); extern int _ma_read_pack_record(MARIA_HA *info, my_off_t filepos, byte *buf); extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, my_off_t, my_bool); -extern int _ma_pack_rec_unpack(MARIA_HA *info, byte *to, byte *from, - ulong reclength); +extern int _ma_pack_rec_unpack(MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *from, ulong reclength); extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b); extern int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, const byte *oldrec, const byte *newrec, @@ -663,8 +662,9 @@ typedef struct st_maria_block_info extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t); extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from); -extern uint _ma_pack_get_block_info(MARIA_HA *, MARIA_BLOCK_INFO *, File, - my_off_t); +extern uint _ma_pack_get_block_info(MARIA_HA *mari, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, byte **rec_buff_p, + File file, my_off_t filepos); extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length); extern void _ma_report_error(int errcode, const char *file_name); extern my_bool _ma_memmap_file(MARIA_HA *info); diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index c5a53b1ffac..5a5aaf037f0 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -1967,7 +1967,7 @@ static char *bindigits(ulonglong value, uint bits) DBUG_ASSERT(idx < sizeof(digits)); while (idx) - *(ptr++)= '0' + ((value >> (--idx)) & 1); + *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1); *ptr= '\0'; return digits; } @@ -1997,7 +1997,7 @@ static char *hexdigits(ulonglong value) DBUG_ASSERT(idx < sizeof(digits)); while (idx) { - if ((*(ptr++)= '0' + ((value >> (4 * (--idx))) & 0xf)) > '9') + if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9') *(ptr - 1)+= 'a' - '9' - 1; } *ptr= '\0'; @@ -2286,7 +2286,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) errors++; break; } - idx+= code & 1; + idx+= (uint) code & 1; if (idx >= length) { VOID(fflush(stdout)); diff --git a/storage/maria/plug.in b/storage/maria/plug.in index a9b35aefbfb..198f5d8c289 100644 --- a/storage/maria/plug.in +++ b/storage/maria/plug.in @@ -5,3 +5,4 @@ MYSQL_PLUGIN_ACTIONS(maria, [AC_CONFIG_FILES(storage/maria/unittest/Makefile)]) MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) # Maria will probably go first into max builds, not all builds, # so we don't declare it mandatory. +MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(maria, [ha_maria.cc]) diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index d579213d7f9..be27c13dbe5 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -84,6 +84,7 @@ typedef struct st_mi_state_info time_t recover_time; /* Time for last recover */ time_t check_time; /* Time for last check */ uint sortkey; /* sorted by this key (not used) */ + uint open_count; uint8 changed; /* Changed since myisamchk */ /* the following isn't saved on disk */ @@ -224,16 +225,6 @@ typedef struct st_mi_isam_share } MYISAM_SHARE; -typedef uint mi_bit_type; - -typedef struct st_mi_bit_buff -{ /* Used for packing of record */ - mi_bit_type current_byte; - uint bits; - uchar *pos, *end, *blob_pos, *blob_end; - uint error; -} MI_BIT_BUFF; - struct st_myisam_info { MYISAM_SHARE *s; /* Shared between open:s */ @@ -309,6 +300,7 @@ struct st_myisam_info uchar *rtree_recursion_state; /* For RTREE */ int rtree_recursion_depth; }; + #define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ #define F_EXTRA_LCK -1 /* bits in opt_flag */ diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index be68ffbdc5a..d67fa8fa918 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -1105,16 +1105,16 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) my_off_t total_count; char llbuf[32]; - DBUG_PRINT("info", ("column: %3u", count - huff_counts + 1)); + DBUG_PRINT("info", ("column: %3lu", count - huff_counts + 1)); if (verbose >= 2) - VOID(printf("column: %3u\n", count - huff_counts + 1)); + VOID(printf("column: %3lu\n", count - huff_counts + 1)); if (count->tree_buff) { - DBUG_PRINT("info", ("number of distinct values: %u", + DBUG_PRINT("info", ("number of distinct values: %lu", (count->tree_pos - count->tree_buff) / count->field_length)); if (verbose >= 2) - VOID(printf("number of distinct values: %u\n", + VOID(printf("number of distinct values: %lu\n", (count->tree_pos - count->tree_buff) / count->field_length)); } diff --git a/unittest/mysys/mf_pagecache_consist.c b/unittest/mysys/mf_pagecache_consist.c index f7724222a09..95c2135edad 100755 --- a/unittest/mysys/mf_pagecache_consist.c +++ b/unittest/mysys/mf_pagecache_consist.c @@ -332,14 +332,6 @@ int main(int argc, char **argv __attribute__((unused))) exit(1); } -#ifndef pthread_attr_setstacksize /* void return value */ - if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) - { - fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", - error,errno); - exit(1); - } -#endif #ifdef HAVE_THR_SETCONCURRENCY VOID(thr_setconcurrency(2)); #endif diff --git a/unittest/mysys/mf_pagecache_single.c b/unittest/mysys/mf_pagecache_single.c index 49ecd2986ab..9c467343156 100644 --- a/unittest/mysys/mf_pagecache_single.c +++ b/unittest/mysys/mf_pagecache_single.c @@ -511,14 +511,6 @@ int main(int argc, char **argv __attribute__((unused))) exit(1); } -#ifndef pthread_attr_setstacksize /* void return value */ - if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) - { - fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", - error,errno); - exit(1); - } -#endif #ifdef HAVE_THR_SETCONCURRENCY VOID(thr_setconcurrency(2)); #endif From 714f3b73e513f2d12fb45e8256fa6299e60cd5a2 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Dec 2006 18:58:35 +0100 Subject: [PATCH 81/95] merge of recent MyISAM changes into Maria. Only failure is ndb_restore - could have been worse!! include/pagecache.h: LSN->lsn mysys/mf_keycache.c: page_status is int mysys/mf_pagecache.c: merge of recent key cache changes sql/mysqld.cc: post-merge fixes sql/set_var.cc: post-merge fixes storage/maria/ha_maria.cc: merge of recent MyISAM changes into Maria storage/maria/ha_maria.h: merge of recent MyISAM changes into Maria storage/maria/ma_close.c: merge of recent MyISAM changes into Maria storage/maria/ma_create.c: merge of recent MyISAM changes into Maria storage/maria/ma_delete.c: merge of recent MyISAM changes into Maria storage/maria/ma_dynrec.c: merge of recent MyISAM changes into Maria storage/maria/ma_ft_boolean_search.c: merge of recent MyISAM changes into Maria storage/maria/ma_key.c: merge of recent MyISAM changes into Maria storage/maria/ma_keycache.c: merge of recent MyISAM changes into Maria storage/maria/ma_open.c: merge of recent MyISAM changes into Maria storage/maria/ma_page.c: merge of recent MyISAM changes into Maria storage/maria/ma_rsamepos.c: merge of recent MyISAM changes into Maria storage/maria/ma_statrec.c: merge of recent MyISAM changes into Maria storage/maria/ma_unique.c: merge of recent MyISAM changes into Maria storage/maria/maria_chk.c: merge of recent MyISAM changes into Maria storage/maria/maria_pack.c: merge of recent MyISAM changes into Maria storage/myisam/myisampack.c: compiler warning --- include/pagecache.h | 2 +- mysys/mf_keycache.c | 4 +-- mysys/mf_pagecache.c | 54 ++++++++++++++-------------- sql/mysqld.cc | 2 ++ sql/set_var.cc | 2 ++ storage/maria/ha_maria.cc | 18 ++++++---- storage/maria/ha_maria.h | 2 +- storage/maria/ma_close.c | 5 +-- storage/maria/ma_create.c | 6 ++-- storage/maria/ma_delete.c | 9 ++--- storage/maria/ma_dynrec.c | 4 +-- storage/maria/ma_ft_boolean_search.c | 1 - storage/maria/ma_key.c | 15 ++++---- storage/maria/ma_keycache.c | 4 +-- storage/maria/ma_open.c | 23 ++++++++++-- storage/maria/ma_page.c | 4 +-- storage/maria/ma_rsamepos.c | 3 +- storage/maria/ma_statrec.c | 4 +-- storage/maria/ma_unique.c | 4 +-- storage/maria/maria_chk.c | 1 + storage/maria/maria_pack.c | 36 ++++++++++--------- storage/myisam/myisampack.c | 4 +-- 22 files changed, 121 insertions(+), 86 deletions(-) diff --git a/include/pagecache.h b/include/pagecache.h index 9f215325ae5..418e0203f64 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -221,7 +221,7 @@ extern my_bool pagecache_delete_page(PAGECACHE *pagecache, enum pagecache_page_lock lock, my_bool flush); extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); -extern my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, +extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, LEX_STRING *str, LSN *max_lsn); diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 111a28d01a4..12be01184b6 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -1642,9 +1642,9 @@ restart: KEYCACHE_DBUG_ASSERT(page_status != -1); *page_st=page_status; KEYCACHE_DBUG_PRINT("find_key_block", - ("fd: %d pos: %lu block->status: %u page_status: %u", + ("fd: %d pos: %lu block->status: %u page_status: %d", file, (ulong) filepos, block->status, - (uint) page_status)); + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_keycache2", diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 96c855fda0a..5f4e8d1a97d 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -759,9 +759,9 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, DBUG_PRINT("exit", ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", - pagecache->disk_blocks, pagecache->block_root, - pagecache->hash_entries, pagecache->hash_root, - pagecache->hash_links, pagecache->hash_link_root)); + pagecache->disk_blocks, (long) pagecache->block_root, + pagecache->hash_entries, (long) pagecache->hash_root, + pagecache->hash_links, (long) pagecache->hash_link_root)); bzero((gptr) pagecache->changed_blocks, sizeof(pagecache->changed_blocks[0]) * PAGECACHE_CHANGED_BLOCKS_HASH); @@ -985,7 +985,7 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) { DBUG_ENTER("end_pagecache"); - DBUG_PRINT("enter", ("key_cache: 0x%lx", pagecache)); + DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache)); if (!pagecache->inited) DBUG_VOID_RETURN; @@ -1004,7 +1004,7 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) pagecache->blocks_changed= 0; } - DBUG_PRINT("status", ("used: %d changed: %d w_requests: %lu " + DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " "writes: %lu r_requests: %lu reads: %lu", pagecache->blocks_used, pagecache->global_blocks_changed, (ulong) pagecache->global_cache_w_requests, @@ -1466,7 +1466,7 @@ static void unreg_request(PAGECACHE *pagecache, if (block->temperature == BLOCK_WARM) pagecache->warm_blocks--; block->temperature= BLOCK_HOT; - KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", pagecache->warm_blocks)); } link_block(pagecache, block, hot, (my_bool)at_end); @@ -1485,7 +1485,7 @@ static void unreg_request(PAGECACHE *pagecache, pagecache->warm_blocks++; block->temperature= BLOCK_WARM; } - KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", pagecache->warm_blocks)); } } @@ -1789,11 +1789,11 @@ static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache, DBUG_ENTER("find_key_block"); KEYCACHE_THREAD_TRACE("find_key_block:begin"); - DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu", - (uint) file->file, (ulong) pageno, (uint) wrmode)); - KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu", - (uint) file->file, (ulong) pageno, - (uint) wrmode)); + DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, wrmode)); + KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, + wrmode)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", test_key_cache(pagecache, "start of find_key_block", 0);); @@ -2103,14 +2103,14 @@ restart: KEYCACHE_DBUG_ASSERT(page_status != -1); *page_st=page_status; DBUG_PRINT("info", - ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %u", (ulong) block, (uint) file->file, (ulong) pageno, block->status, (uint) page_status)); KEYCACHE_DBUG_PRINT("find_key_block", - ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", (ulong) block, - (uint) file->file, (ulong) pageno, block->status, - (uint) page_status)); + file->file, (ulong) pageno, block->status, + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", @@ -3502,7 +3502,7 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; int last_errno= 0; DBUG_ENTER("flush_pagecache_blocks_int"); - DBUG_PRINT("enter",("file: %d blocks_used: %d blocks_changed: %d", + DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", file->file, pagecache->blocks_used, pagecache->blocks_changed)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) @@ -3714,7 +3714,7 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, { int res; DBUG_ENTER("flush_pagecache_blocks"); - DBUG_PRINT("enter", ("pagecache: 0x%lx", pagecache)); + DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache)); if (pagecache->disk_blocks <= 0) DBUG_RETURN(0); @@ -3800,7 +3800,7 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) of type PAGECACHE_LSN_PAGE. SYNOPSIS - pagecache_collect_changed_blocks_with_LSN() + pagecache_collect_changed_blocks_with_lsn() pagecache pointer to the page cache str (OUT) pointer to a LEX_STRING where the allocated buffer, and its size, will be put @@ -3809,7 +3809,8 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) DESCRIPTION Does the allocation because the caller cannot know the size itself. - Memory freeing is done by the caller. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they are not interesting for a checkpoint record. The caller has the intention of doing checkpoints. @@ -3818,17 +3819,18 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) 0 on success 1 on error */ -my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, +my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, LEX_STRING *str, LSN *max_lsn) { my_bool error; - ulong stored_LRD_size= 0; + ulong stored_list_size= 0; uint file_hash; char *ptr; DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); *max_lsn= 0; + DBUG_ASSERT(NULL == str->str); /* We lock the entire cache but will be quick, just reading/writing a few MBs of memory at most. @@ -3879,17 +3881,17 @@ my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, DBUG_ASSERT(0); goto err; } - stored_LRD_size++; + stored_list_size++; } } - str->length= 8+(4+4+8)*stored_LRD_size; + str->length= 8+(4+4+8)*stored_list_size; if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) goto err; ptr= str->str; - int8store(ptr, stored_LRD_size); + int8store(ptr, stored_list_size); ptr+= 8; - if (0 == stored_LRD_size) + if (0 == stored_list_size) goto end; for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) { diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8b54efb1199..22c1b6937fb 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -8187,11 +8187,13 @@ void refresh_status(THD *thd) #undef have_innodb #undef have_ndbcluster #undef have_csv_db +#undef have_maria_db SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO; SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO; SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_maria_db= SHOW_OPTION_NO; #ifndef WITH_INNOBASE_STORAGE_ENGINE uint innobase_flush_log_at_trx_commit; diff --git a/sql/set_var.cc b/sql/set_var.cc index c72fe6fd6fe..bd68863ff97 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -674,6 +674,7 @@ sys_var_have_variable sys_have_csv_db("have_csv", &have_csv_db); sys_var_have_variable sys_have_dlopen("have_dynamic_loading", &have_dlopen); sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); +sys_var_have_variable sys_have_maria_db("have_maria", &have_maria_db); sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); sys_var_have_variable sys_have_partition_db("have_partitioning", @@ -800,6 +801,7 @@ SHOW_VAR init_vars[]= { {sys_have_dlopen.name, (char*) &have_dlopen, SHOW_HAVE}, {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, + {sys_have_maria_db.name, (char*) &have_maria_db, SHOW_HAVE}, {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, {sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE}, diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 56f33693528..13f468b41aa 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -635,7 +635,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) { param.testflag &= ~T_RETRY_WITHOUT_QUICK; sql_print_information("Retrying repair of: '%s' without quick", - table->s->path); + table->s->path.str); continue; } param.testflag &= ~T_QUICK; @@ -643,7 +643,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) { param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP; sql_print_information("Retrying repair of: '%s' with keycache", - table->s->path); + table->s->path.str); continue; } break; @@ -654,7 +654,8 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) char llbuff[22], llbuff2[22]; sql_print_information("Found %s of %s rows when repairing '%s'", llstr(file->state->records, llbuff), - llstr(start_records, llbuff2), table->s->path); + llstr(start_records, llbuff2), + table->s->path.str); } return error; } @@ -1183,7 +1184,7 @@ bool ha_maria::check_and_repair(THD *thd) // Don't use quick if deleted rows if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK)) check_opt.flags |= T_QUICK; - sql_print_warning("Checking table: '%s'", table->s->path); + sql_print_warning("Checking table: '%s'", table->s->path.str); old_query= thd->query; old_query_length= thd->query_length; @@ -1194,7 +1195,7 @@ bool ha_maria::check_and_repair(THD *thd) if ((marked_crashed= maria_is_crashed(file)) || check(thd, &check_opt)) { - sql_print_warning("Recovering table: '%s'", table->s->path); + sql_print_warning("Recovering table: '%s'", table->s->path.str); check_opt.flags= ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) | (marked_crashed ? 0 : T_QUICK) | @@ -1506,6 +1507,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg, bool found_real_auto_increment= 0; enum ha_base_keytype type; char buff[FN_REFLEN]; + byte *record; KEY *pos; MARIA_KEYDEF *keydef; MARIA_COLUMNDEF *recinfo, *recinfo_pos; @@ -1608,6 +1610,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg, found_real_auto_increment= share->next_number_key_offset == 0; } + record= table_arg->record[0]; recpos= 0; recinfo_pos= recinfo; while (recpos < (uint) share->reclength) @@ -1618,7 +1621,8 @@ int ha_maria::create(const char *name, register TABLE *table_arg, for (field= table_arg->field; *field; field++) { - if ((fieldpos= (*field)->offset()) >= recpos && fieldpos <= minpos) + if ((fieldpos=(*field)->offset(record)) >= recpos && + fieldpos <= minpos) { /* skip null fields */ if (!(temp_length= (*field)->pack_length_in_rec())) @@ -1633,7 +1637,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg, } } DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d", - found, recpos, minpos, length)); + (long) found, recpos, minpos, length)); if (recpos != minpos) { // Reserved space (Null bits?) bzero((char*) recinfo_pos, sizeof(*recinfo_pos)); diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 52f289a7428..1f243f9ec59 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -36,7 +36,7 @@ extern ulong maria_recover_options; class ha_maria :public handler { MARIA_HA *file; - ulong int_table_flags; + ulonglong int_table_flags; char *data_file_name, *index_file_name; bool can_enable_indexes; int repair(THD * thd, HA_CHECK ¶m, bool optimize); diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 73764cf444a..5d7b24cc314 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -28,8 +28,9 @@ int maria_close(register MARIA_HA *info) int error=0,flag; MARIA_SHARE *share=info->s; DBUG_ENTER("maria_close"); - DBUG_PRINT("enter",("base: %lx reopen: %u locks: %u", - info,(uint) share->reopen, (uint) share->tot_locks)); + DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u", + (long) info, (uint) share->reopen, + (uint) share->tot_locks)); pthread_mutex_lock(&THR_LOCK_maria); if (info->lock_type == F_EXTRA_LCK) diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 76942e3d5e8..d99c4fcc26b 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -441,9 +441,9 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, block_length= max(block_length, MARIA_MIN_KEY_BLOCK_LENGTH); block_length= min(block_length, MARIA_MAX_KEY_BLOCK_LENGTH); - keydef->block_length= MARIA_BLOCK_SIZE(length-real_length_diff, - pointer,MARIA_MAX_KEYPTR_SIZE, - block_length); + keydef->block_length= (uint16) MARIA_BLOCK_SIZE(length-real_length_diff, + pointer,MARIA_MAX_KEYPTR_SIZE, + block_length); if (keydef->block_length > MARIA_MAX_KEY_BLOCK_LENGTH || length >= HA_MAX_KEY_BUFF) { diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 8849c89e30c..f0295a3c413 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -163,7 +163,7 @@ static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, DBUG_PRINT("error",("Couldn't allocate memory")); DBUG_RETURN(my_errno=ENOMEM); } - DBUG_PRINT("info",("root_page: %ld",old_root)); + DBUG_PRINT("info",("root_page: %ld", (long) old_root)); if (!_ma_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0)) { error= -1; @@ -406,7 +406,7 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *k MARIA_SHARE *share=info->s; MARIA_KEY_PARAM s_temp; DBUG_ENTER("del"); - DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", leaf_page, + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page, (ulong) keypos)); DBUG_DUMP("leaf_buff",(byte*) leaf_buff,maria_getint(leaf_buff)); @@ -593,7 +593,8 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, else { /* Page is full */ endpos=anc_buff+anc_length; - DBUG_PRINT("test",("anc_buff: %lx endpos: %lx",anc_buff,endpos)); + DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx", + (long) anc_buff, (long) endpos)); if (keypos != anc_buff+2+key_reflength && !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length)) goto err; @@ -771,7 +772,7 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, int s_length; uchar *start; DBUG_ENTER("remove_key"); - DBUG_PRINT("enter",("keypos: %lx page_end: %lx",keypos,page_end)); + DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx",(long) keypos, (long) page_end)); start=keypos; if (!(keyinfo->flag & diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 253a538861a..e0f87addb43 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1240,8 +1240,8 @@ ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, err: my_errno= HA_ERR_WRONG_IN_RECORD; - DBUG_PRINT("error",("to_end: %lx -> %lx from_end: %lx -> %lx", - to,to_end,from,from_end)); + DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx", + (long) to, (long) to_end, (long) from, (long) from_end)); DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length); DBUG_RETURN(MY_FILE_ERROR); } /* _ma_rec_unpack */ diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c index 83901cb5e47..24b51ef469c 100644 --- a/storage/maria/ma_ft_boolean_search.c +++ b/storage/maria/ma_ft_boolean_search.c @@ -333,7 +333,6 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) MARIA_HA *info=ftb->info; uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength; byte *lastkey_buf=ftbw->word+ftbw->off; - LINT_INIT(off); if (ftbw->flags & FTB_FLAG_TRUNC) lastkey_buf+=ftbw->len; diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index ecd51f5dc92..78465ca729f 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -52,7 +52,7 @@ static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,byte *record); uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, const byte *record, my_off_t filepos) { - byte *pos,*end; + byte *pos; uchar *start; reg1 HA_KEYSEG *keyseg; my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; @@ -107,18 +107,17 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, } if (keyseg->flag & HA_SPACE_PACK) { - end= pos + length; if (type != HA_KEYTYPE_NUM) { - while (end > pos && end[-1] == ' ') - end--; + length= cs->cset->lengthsp(cs, pos, length); } else { + byte *end= pos + length; while (pos < end && pos[0] == ' ') pos++; + length=(uint) (end-pos); } - length=(uint) (end-pos); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); memcpy((byte*) key,(byte*) pos,(size_t) char_length); @@ -403,8 +402,10 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, pos= record+keyseg->start; if (keyseg->type != (int) HA_KEYTYPE_NUM) { - memcpy(pos,key,(size_t) length); - bfill(pos+length,keyseg->length-length,' '); + memcpy(pos,key,(size_t) length); + keyseg->charset->cset->fill(keyseg->charset, + pos + length, keyseg->length - length, + ' '); } else { diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c index 837b0fbac66..4f150d905ed 100644 --- a/storage/maria/ma_keycache.c +++ b/storage/maria/ma_keycache.c @@ -54,8 +54,8 @@ int maria_assign_to_key_cache(MARIA_HA *info, int error= 0; MARIA_SHARE* share= info->s; DBUG_ENTER("maria_assign_to_key_cache"); - DBUG_PRINT("enter",("old_key_cache_handle: %lx new_key_cache_handle: %lx", - share->key_cache, key_cache)); + DBUG_PRINT("enter",("old_key_cache_handle: 0x%lx new_key_cache_handle: 0x%lx", + (long) share->key_cache, (long) key_cache)); /* Skip operation if we didn't change key cache. This can happen if we diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 4041c101bde..ab38d2baea1 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -340,6 +340,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) goto err; } } + else if (pos->type == HA_KEYTYPE_BINARY) + pos->charset= &my_charset_bin; } if (share->keyinfo[i].flag & HA_SPATIAL) { @@ -1259,12 +1261,29 @@ int maria_enable_indexes(MARIA_HA *info) RETURN 0 indexes are not disabled 1 all indexes are disabled - [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] + 2 non-unique indexes are disabled */ int maria_indexes_are_disabled(MARIA_HA *info) { MARIA_SHARE *share= info->s; - return (! maria_is_any_key_active(share->state.key_map) && share->base.keys); + /* + No keys or all are enabled. keys is the number of keys. Left shifted + gives us only one bit set. When decreased by one, gives us all all bits + up to this one set and it gets unset. + */ + if (!share->base.keys || + (maria_is_all_keys_active(share->state.key_map, share->base.keys))) + return 0; + + /* All are disabled */ + if (maria_is_any_key_active(share->state.key_map)) + return 1; + + /* + We have keys. Some enabled, some disabled. + Don't check for any non-unique disabled but return directly 2 + */ + return 2; } diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index 054b8e16468..e5f9f47eaa5 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -27,7 +27,7 @@ uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *tmp; uint page_size; DBUG_ENTER("_ma_fetch_keypage"); - DBUG_PRINT("enter",("page: %ld",page)); + DBUG_PRINT("enter",("page: %ld", (long) page)); tmp=(uchar*) key_cache_read(info->s->key_cache, info->s->kfile, page, level, (byte*) buff, @@ -80,7 +80,7 @@ int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, my_errno=EINVAL; DBUG_RETURN((-1)); } - DBUG_PRINT("page",("write page at: %lu",(long) page,buff)); + DBUG_PRINT("page",("write page at: %lu",(long) page)); DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); #endif diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c index 09861c03c32..b49ef8d294a 100644 --- a/storage/maria/ma_rsamepos.c +++ b/storage/maria/ma_rsamepos.c @@ -33,7 +33,8 @@ int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, my_off_t filepos DBUG_ENTER("maria_rsame_with_pos"); DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos)); - if (inx < -1 || (inx >= 0 && !maria_is_key_active(info->s->state.key_map, inx))) + if (inx < -1 || + (inx >= 0 && ! maria_is_key_active(info->s->state.key_map, inx))) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c index 0aef24f40a9..82e31115ec7 100644 --- a/storage/maria/ma_statrec.c +++ b/storage/maria/ma_statrec.c @@ -251,8 +251,8 @@ int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, if (filepos >= info->state->data_file_length) { DBUG_PRINT("test",("filepos: %ld (%ld) records: %ld del: %ld", - filepos/share->base.reclength,filepos, - info->state->records, info->state->del)); + (long) filepos/share->base.reclength, (long) filepos, + (long) info->state->records, (long) info->state->del)); fast_ma_writeinfo(info); DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE); } diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index bc1aa71966b..e2c7ca3c80c 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -57,8 +57,8 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, if (_ma_search_next(info,info->s->keyinfo+def->key, info->lastkey, MARIA_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, info->s->state.key_root[def->key]) || - memcmp((char*) info->lastkey, (char*) key_buff, - MARIA_UNIQUE_HASH_LENGTH)) + bcmp((char*) info->lastkey, (char*) key_buff, + MARIA_UNIQUE_HASH_LENGTH)) { info->page_changed=1; /* Can't optimize read next */ info->lastpos=lastpos; diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 89858ee2d07..6ba5200918e 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -701,6 +701,7 @@ get_one_option(int optid, { int method; enum_handler_stats_method method_conv; + LINT_INIT(method_conv); maria_stats_method_str= argument; if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0) { diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index 5a5aaf037f0..24de45a89bc 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -1107,18 +1107,18 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) my_off_t total_count; char llbuf[32]; - DBUG_PRINT("info", ("column: %3lu", count - huff_counts + 1)); + DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1))); if (verbose >= 2) - VOID(printf("column: %3lu\n", count - huff_counts + 1)); + VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1))); if (count->tree_buff) { - DBUG_PRINT("info", ("number of distinct values: %lu", - (count->tree_pos - count->tree_buff) / - count->field_length)); + DBUG_PRINT("info", ("number of distinct values: %u", + (uint) ((count->tree_pos - count->tree_buff) / + count->field_length))); if (verbose >= 2) - VOID(printf("number of distinct values: %lu\n", - (count->tree_pos - count->tree_buff) / - count->field_length)); + VOID(printf("number of distinct values: %u\n", + (uint) ((count->tree_pos - count->tree_buff) / + count->field_length))); } total_count= 0; for (idx= 0; idx < 256; idx++) @@ -2038,7 +2038,7 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) uint huff_tree_bits; huff_tree_bits=max_bit(trees ? trees-1 : 0); - DBUG_PRINT("info", ("")); + DBUG_PRINT("info", (" ")); DBUG_PRINT("info", ("column types:")); DBUG_PRINT("info", ("FIELD_NORMAL 0")); DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1")); @@ -2050,12 +2050,12 @@ static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) DBUG_PRINT("info", ("FIELD_ZERO 7")); DBUG_PRINT("info", ("FIELD_VARCHAR 8")); DBUG_PRINT("info", ("FIELD_CHECK 9")); - DBUG_PRINT("info", ("")); + DBUG_PRINT("info", (" ")); DBUG_PRINT("info", ("pack type as a set of flags:")); DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1")); DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2")); DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4")); - DBUG_PRINT("info", ("")); + DBUG_PRINT("info", (" ")); if (verbose >= 2) { VOID(printf("\n")); @@ -2128,7 +2128,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) return 0; } - DBUG_PRINT("info", ("")); + DBUG_PRINT("info", (" ")); if (verbose >= 2) VOID(printf("\n")); tree_no= 0; @@ -2139,7 +2139,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) if (huff_tree->tree_number == 0) continue; /* Deleted tree */ tree_no++; - DBUG_PRINT("info", ("")); + DBUG_PRINT("info", (" ")); if (verbose >= 3) VOID(printf("\n")); /* Count the total number of elements (byte codes or column values). */ @@ -2281,8 +2281,8 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) if (bits > 8 * sizeof(code)) { VOID(fflush(stdout)); - VOID(fprintf(stderr, "error: Huffman code too long: %u/%lu\n", - bits, 8 * sizeof(code))); + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + bits, (uint) (8 * sizeof(code)))); errors++; break; } @@ -2331,7 +2331,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) } flush_bits(); } - DBUG_PRINT("info", ("")); + DBUG_PRINT("info", (" ")); if (verbose >= 2) VOID(printf("\n")); my_afree((gptr) packed_tree); @@ -2509,7 +2509,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) end_pos-=count->max_zero_fill; field_length-=count->max_zero_fill; - switch(count->field_type) { + switch (count->field_type) { case FIELD_SKIP_ZERO: if (!memcmp((byte*) start_pos,zero_string,field_length)) { @@ -2925,6 +2925,8 @@ static void flush_bits(void) bits-= 8; *file_buffer.pos++= (uchar) (bit_buffer >> bits); } + if (file_buffer.pos >= file_buffer.end) + VOID(flush_buffer(~ (ulong) 0)); file_buffer.bits= BITS_SAVED; file_buffer.bitbucket= 0; } diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index 623d65f7ce3..2753ed5f7b2 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -1110,11 +1110,11 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1))); if (count->tree_buff) { - DBUG_PRINT("info", ("number of distinct values: %lu", + DBUG_PRINT("info", ("number of distinct values: %u", (uint) ((count->tree_pos - count->tree_buff) / count->field_length))); if (verbose >= 2) - VOID(printf("number of distinct values: %lu\n", + VOID(printf("number of distinct values: %u\n", (uint) ((count->tree_pos - count->tree_buff) / count->field_length))); } From 649b3b46055594e77cadabe5768b762b8aa486da Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Dec 2006 19:01:07 +0100 Subject: [PATCH 82/95] WL#3071 - Maria checkpoint: a function to store information about transactions into buffers, is added to the transaction manager, and called by the Checkpoint module. storage/maria/ma_checkpoint.c: "collecting info about transactions" moves to trnman.c storage/maria/trnman.c: a function to store information about the active transactions list and committed transactions list, into buffers, for use by the Checkpoint module. This function needs to know how many trns there are in the committed list, so we introduce a counter, trnman_committed_transactions. m_string.h is needed for LEX_STRING. storage/maria/trnman.h: A function to store information about the active transactions list and committed transactions list, into buffers, for use by the Checkpoint module. storage/maria/unittest/trnman-t.c: trnman.h needs LEX_STRING so m_string.h --- storage/maria/ma_checkpoint.c | 68 +++--------------- storage/maria/trnman.c | 112 ++++++++++++++++++++++++++++-- storage/maria/trnman.h | 1 + storage/maria/unittest/trnman-t.c | 1 + 4 files changed, 118 insertions(+), 64 deletions(-) diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index a1d094d7da1..4294acb5627 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -176,7 +176,8 @@ my_bool execute_checkpoint_indirect() /* checkpoint record data: */ LSN checkpoint_start_lsn; char checkpoint_start_lsn_char[8]; - LEX_STRING strings[5]={ {&checkpoint_start_lsn_str, 8}, {0,0}, {0,0}, {0,0}, {0,0} }; + LEX_STRING strings[6]= + {checkpoint_start_lsn_char, 8}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} }; char *ptr; LSN checkpoint_lsn; LSN candidate_max_rec_lsn_at_last_checkpoint; @@ -203,69 +204,16 @@ my_bool execute_checkpoint_indirect() goto err; /* STEP 3: fetch information about transactions */ - /* note: this piece will move into trnman.c */ - /* - Transactions are in the "active list" (protected by a mutex) and in a - lock-free hash of "committed" (insertion protected by the same mutex, - deletion lock-free). - */ - { - TRN *trn; - ulong stored_trn_size= 0; - /* First, the active transactions */ - pthread_mutex_lock(LOCK_trn_list); - string2.length= 8+(7+2+8+8+8)*trnman_active_transactions; - if (NULL == (string2.str= my_malloc(string2.length))) - goto err; - ptr= string2.str; - ptr+= 8; - for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) - { - /* we would latch trn.rwlock if it existed */ - if (0 == trn->short_trid) /* trn is not inited, skip */ - continue; - /* state is not needed for now (only when we have prepared trx) */ - /* int7store does not exist but mi_int7store does */ - int7store(ptr, trn->trid); - ptr+= 7; - int2store(ptr, trn->short_trid); - ptr+= 2; - int8store(ptr, trn->undo_lsn); /* is an LSN 7 or 8 bytes really? */ - ptr+= 8; - int8store(ptr, trn->undo_purge_lsn); - ptr+= 8; - int8store(ptr, read_non_atomic(&trn->first_undo_lsn)); - ptr+= 8; - /* possibly unlatch el.rwlock */ - stored_trn_size++; - } - pthread_mutex_unlock(LOCK_trn_list); - /* - Now the committed ones. - We need a function which scans the hash's list of elements in a - lock-free manner (a bit like lfind(), starting from bucket 0), and for - each node (committed transaction) stores the transaction's - information (trid, undo_purge_lsn, first_undo_lsn) into a buffer. - This big buffer is malloc'ed at the start, so the number of elements (or - an upper bound of it) found in the hash needs to be known in advance - (one solution is to keep LOCK_trn_list locked, ensuring that nodes are - only deleted). - */ - /* - TODO: if we see there exists no transaction (active and committed) we can - tell the lock-free structures to do some freeing (my_free()). - */ - int8store(string1.str, stored_trn_size); - string2.length= 8+(7+2+8+8+8)*stored_trn_size; - } + if (trnman_collect_transactions(&strings[2], &strings[3])) + goto err; /* STEP 4: fetch information about table files */ { /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ lock(global_share_list_mutex); - string3.length= 8+(8+8)*share_list->count; - if (NULL == (string3.str= my_malloc(string3.length))) + strings[4].length= 8+(8+8)*share_list->count; + if (NULL == (strings[4].str= my_malloc(strings[4].length))) goto err; ptr= string3.str; /* possibly latch each MARIA_SHARE, one by one, like this: */ @@ -327,8 +275,8 @@ err: end: - for (i= 1; i<5; i++) - my_free(strings[i], MYF(MY_ALLOW_ZERO_PTR)); + for (i= 1; i<6; i++) + my_free(strings[i].str, MYF(MY_ALLOW_ZERO_PTR)); /* this portion cannot be done as a hook in write_log_record() for the diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 4399f0e1208..f289f6fcc5b 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -18,10 +18,16 @@ #include #include #include +#include #include "trnman.h" -/* status variables */ -uint trnman_active_transactions, trnman_allocated_transactions; +/* + status variables: + how many trns in the active list currently, + in the committed list currently, allocated since startup. +*/ +uint trnman_active_transactions, trnman_committed_transactions, + trnman_allocated_transactions; /* list of active transactions in the trid order */ static TRN active_list_min, active_list_max; @@ -100,6 +106,7 @@ int trnman_init() committed_list_min.next= &committed_list_max; trnman_active_transactions= 0; + trnman_committed_transactions= 0; trnman_allocated_transactions= 0; pool= 0; @@ -129,6 +136,7 @@ void trnman_destroy() { DBUG_ASSERT(trid_to_committed_trn.count == 0); DBUG_ASSERT(trnman_active_transactions == 0); + DBUG_ASSERT(trnman_committed_transactions == 0); DBUG_ASSERT(active_list_max.prev == &active_list_min); DBUG_ASSERT(active_list_min.next == &active_list_max); DBUG_ASSERT(committed_list_max.prev == &committed_list_min); @@ -285,11 +293,14 @@ void trnman_end_trn(TRN *trn, my_bool commit) */ if (trn->prev == &active_list_min) { + uint free_me_count; TRN *t; - for (t= committed_list_min.next; + for (t= committed_list_min.next, free_me_count= 0; t->commit_trid < active_list_min.next->min_read_from; - t= t->next) /* no-op */; + t= t->next, free_me_count++) /* no-op */; + DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) || + (t == committed_list_min.next && free_me_count == 0)); /* found transactions committed before the oldest active one */ if (t != committed_list_min.next) { @@ -297,6 +308,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) committed_list_min.next= t; t->prev->next= 0; t->prev= &committed_list_min; + trnman_committed_transactions-= free_me_count; } } @@ -312,6 +324,7 @@ void trnman_end_trn(TRN *trn, my_bool commit) trn->next= &committed_list_max; trn->prev= committed_list_max.prev; committed_list_max.prev= trn->prev->next= trn; + trnman_committed_transactions++; res= lf_hash_insert(&trid_to_committed_trn, pins, &trn); DBUG_ASSERT(res == 0); @@ -413,3 +426,94 @@ my_bool trnman_can_read_from(TRN *trn, TrID trid) return can; } + +/* + Allocates two buffers and stores in them some information about transactions + of the active list (into the first buffer) and of the committed list (into + the second buffer). + + SYNOPSIS + trnman_collect_transactions() + str_act (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + str_com (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + + + DESCRIPTION + Does the allocation because the caller cannot know the size itself. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). + The caller has the intention of doing checkpoints. + + RETURN + 0 on success + 1 on error +*/ +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com) +{ + my_bool error; + TRN *trn; + char *ptr; + DBUG_ENTER("trnman_collect_transactions"); + + DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str)); + + pthread_mutex_lock(&LOCK_trn_list); + str_act->length= 8+(6+2+7+7+7)*trnman_active_transactions; + str_com->length= 8+(6+7+7)*trnman_committed_transactions; + if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) || + (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME))))) + goto err; + /* First, the active transactions */ + ptr= str_act->str; + int8store(ptr, (ulonglong)trnman_active_transactions); + ptr+= 8; + for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) + { + /* + trns with a short trid of 0 are not initialized; Recovery will recognize + this and ignore them. + State is not needed for now (only when we supported prepared trns). + For LSNs, Sanja will soon push lsn7store. + */ + int6store(ptr, trn->trid); + ptr+= 6; + int2store(ptr, trn->short_id); + ptr+= 2; + /* needed for rollback */ + /* lsn7store(ptr, trn->undo_lsn); */ + ptr+= 7; + /* needed for purge */ + /* lsn7store(ptr, trn->undo_purge_lsn); */ + ptr+= 7; + /* needed for low-water mark calculation */ + /* lsn7store(ptr, read_non_atomic(&trn->first_undo_lsn)); */ + ptr+= 7; + } + /* do the same for committed ones */ + ptr= str_com->str; + int8store(ptr, (ulonglong)trnman_committed_transactions); + ptr+= 8; + for (trn= committed_list_min.next; trn != &committed_list_max; + trn= trn->next) + { + int6store(ptr, trn->trid); + ptr+= 6; + /* mi_int7store(ptr, trn->undo_purge_lsn); */ + ptr+= 7; + /* mi_int7store(ptr, read_non_atomic(&trn->first_undo_lsn)); */ + ptr+= 7; + } + /* + TODO: if we see there exists no transaction (active and committed) we can + tell the lock-free structures to do some freeing (my_free()). + */ + error= 0; + goto end; +err: + error= 1; +end: + pthread_mutex_unlock(&LOCK_trn_list); + DBUG_RETURN(error); +} diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h index 409e354d423..267e3cabd7a 100644 --- a/storage/maria/trnman.h +++ b/storage/maria/trnman.h @@ -51,6 +51,7 @@ void trnman_end_trn(TRN *trn, my_bool commit); #define trnman_abort_trn(T) trnman_end_trn(T, FALSE) void trnman_free_trn(TRN *trn); my_bool trnman_can_read_from(TRN *trn, TrID trid); +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com); #endif diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c index 7da8202b881..3c70d10c440 100644 --- a/storage/maria/unittest/trnman-t.c +++ b/storage/maria/unittest/trnman-t.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "../trnman.h" pthread_mutex_t rt_mutex; From ba7a2a9ab788748b2dffa8526fd4aa1d9396a26e Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Dec 2006 22:30:00 +0100 Subject: [PATCH 83/95] Maria - fixes to the pagecache unit tests (which started to print "x threads didn't exit" today after my merge); the main() thread was calling my_init() and my_thread_global_init() which is redudant; it was not calling my_end() though it's needed. unittest/mysys/mf_pagecache_consist.c: program already calls my_init(), which calls my_thread_global_init(), so no need for my_thread_global_init() (double call caused the main() thread to wait for itself thus timing out and printing "x threads didn't exit". my_thread_global_end() is not enough, as we call my_init() we must call my_end() (which calls my_thread_global_end()). unittest/mysys/mf_pagecache_single.c: program already calls my_init(), which calls my_thread_global_init(), so no need for my_thread_global_init() (double call caused the main() thread to wait for itself thus timing out and printing "x threads didn't exit". my_thread_global_end() is not enough, as we call my_init() we must call my_end() (which calls my_thread_global_end()). --- unittest/mysys/mf_pagecache_consist.c | 5 +---- unittest/mysys/mf_pagecache_single.c | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/unittest/mysys/mf_pagecache_consist.c b/unittest/mysys/mf_pagecache_consist.c index 95c2135edad..d31c19cc746 100755 --- a/unittest/mysys/mf_pagecache_consist.c +++ b/unittest/mysys/mf_pagecache_consist.c @@ -336,9 +336,6 @@ int main(int argc, char **argv __attribute__((unused))) VOID(thr_setconcurrency(2)); #endif - my_thread_global_init(); - - if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, PAGE_SIZE, 0)) == 0) { @@ -427,7 +424,7 @@ int main(int argc, char **argv __attribute__((unused))) exit(1); } /*my_delete(file1_name, MYF(0));*/ - my_thread_global_end(); + my_end(0); DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); diff --git a/unittest/mysys/mf_pagecache_single.c b/unittest/mysys/mf_pagecache_single.c index 9c467343156..4fadf2a554f 100644 --- a/unittest/mysys/mf_pagecache_single.c +++ b/unittest/mysys/mf_pagecache_single.c @@ -515,8 +515,6 @@ int main(int argc, char **argv __attribute__((unused))) VOID(thr_setconcurrency(2)); #endif - my_thread_global_init(); - plan(12); if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, @@ -569,7 +567,7 @@ int main(int argc, char **argv __attribute__((unused))) exit(1); } /*my_delete(file1_name, MYF(0));*/ - my_thread_global_end(); + my_end(0); DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); From b635df555adec7ebdc4cd40e102d51006c802639 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 3 Jan 2007 12:41:51 +0100 Subject: [PATCH 84/95] very minor comments and merges from MyISAM into Maria. storage/maria/ma_checkpoint.c: comments storage/maria/ma_close.c: comments storage/maria/ma_write.c: merge from myisam storage/maria/maria_def.h: typo storage/myisam/mi_delete.c: unneeded {}, making it identical to Maria --- storage/maria/ma_checkpoint.c | 44 ++++++++++++++++++++++++++++------- storage/maria/ma_close.c | 8 ++++--- storage/maria/ma_write.c | 18 +++++++------- storage/maria/maria_def.h | 2 +- storage/myisam/mi_delete.c | 2 -- 5 files changed, 51 insertions(+), 23 deletions(-) diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 4294acb5627..02d887f758a 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -216,14 +216,27 @@ my_bool execute_checkpoint_indirect() if (NULL == (strings[4].str= my_malloc(strings[4].length))) goto err; ptr= string3.str; - /* possibly latch each MARIA_SHARE, one by one, like this: */ - pthread_mutex_lock(&share->intern_lock); /* - We'll copy the file id (a bit like share->kfile), the file name - (like share->unique_file_name[_length]). + Note that maria_open_list is a list of MARIA_HA*, while we would prefer + a list of MARIA_SHARE* here (we are interested in the short id, + unique file name, members of MARIA_SHARE*, and in file descriptors, + which will in the end be in MARIA_SHARE*). */ - make_copy_of_global_share_list_to_array; - pthread_mutex_unlock(&share->intern_lock); + for (iterate on the maria_open_list) + { + /* latch each MARIA_SHARE, one by one, like this: */ + pthread_mutex_lock(&share->intern_lock); + /* + TODO: + we need to prevent the share from going away while we later flush and + force it without holding THR_LOCK_maria. For example if the share is + free()d by maria_close() we'll have a problem. Or if the share's file + descriptor is closed by maria_close() we will not be able to my_sync() + it. + */ + pthread_mutex_unlock(&share->intern_lock); + store the share pointer into a private array; + } unlock(global_share_list_mutex); /* work on copy */ @@ -231,15 +244,15 @@ my_bool execute_checkpoint_indirect() ptr+= 8; for (el in array) { - int8store(ptr, array[...].file_id); + int8store(ptr, array[...].short_id); ptr+= 8; - memcpy(ptr, array[...].file_name, ...); + memcpy(ptr, array[...].unique_file_name[_length], ...); ptr+= ...; + /* maybe we need to lock share->intern_lock here */ /* these two are long ops (involving disk I/O) that's why we copied the list, to not keep the list locked for long: */ - /* TODO: what if the table pointer is gone/reused now? */ flush_bitmap_pages(el); /* TODO: and also autoinc counter, logical file end, free page list */ @@ -267,6 +280,19 @@ my_bool execute_checkpoint_indirect() if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) goto err; + /* + Note that we should not alter memory structures until we have successfully + written the checkpoint record and control file. + Btw, a log write failure is serious: + - if we know how many bytes we managed to write, we should try to write + more, keeping the log's mutex (MY_FULL_IO) + - if we don't know, this log record is corrupted and we have no way to + "de-corrupt" it, so it will stay corrupted, and as the log is sequential, + any log record written after it will not be reachable (for example if we + would write UNDOs and crash, we would not be able to read the log and so + not be able to rollback), so we should stop the engine now (holding the + log's mutex) and do a recovery. + */ goto end; err: diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 5d7b24cc314..e1a4bb08301 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -60,9 +60,11 @@ int maria_close(register MARIA_HA *info) flag= !--share->reopen; /* RECOVERYTODO: - Below we are going to make the table unknown to future checkpoints, so it - needs to have fsync'ed itself entirely (bitmap, pages, etc) at this - point. + If "flag" is TRUE, in the line below we are going to make the table + unknown to future checkpoints, so it needs to have fsync'ed itself + entirely (bitmap, pages, etc) at this point. + The flushing is currently done a few lines further (which is ok, as we + still hold THR_LOCK_maria), but syncing is missing. */ maria_open_list=list_delete(maria_open_list,&info->open_list); pthread_mutex_unlock(&share->intern_lock); diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index c56a26fefff..55c0bf27259 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -345,7 +345,7 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, my_bool was_last_key; my_off_t next_page, dupp_key_pos; DBUG_ENTER("w_search"); - DBUG_PRINT("enter",("page: %ld",page)); + DBUG_PRINT("enter",("page: %ld", (long) page)); search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ @@ -468,7 +468,7 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *endpos, *prev_key; MARIA_KEY_PARAM s_temp; DBUG_ENTER("_ma_insert"); - DBUG_PRINT("enter",("key_pos: %lx",key_pos)); + DBUG_PRINT("enter",("key_pos: %lx", (long) key_pos)); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key, USE_WHOLE_KEY);); @@ -490,8 +490,8 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, { DBUG_PRINT("test",("t_length: %d ref_len: %d", t_length,s_temp.ref_length)); - DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: %lx", - s_temp.n_ref_length,s_temp.n_length,s_temp.key)); + DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx", + s_temp.n_ref_length,s_temp.n_length, (long) s_temp.key)); } #endif if (t_length > 0) @@ -684,7 +684,8 @@ uchar *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, uchar *page, } while (page < end); *return_key_length=length; *after_key=page; - DBUG_PRINT("exit",("returns: %lx page: %lx half: %lx",lastpos,page,end)); + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx", + (long) lastpos, (long) page, (long) end)); DBUG_RETURN(lastpos); } /* _ma_find_half_pos */ @@ -739,7 +740,8 @@ static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, } *return_key_length=last_length; *after_key=lastpos; - DBUG_PRINT("exit",("returns: %lx page: %lx end: %lx",prevpos,page,end)); + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx", + (long) prevpos,(long) page,(long) end)); DBUG_RETURN(prevpos); } /* _ma_find_last_pos */ @@ -775,7 +777,7 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, next_page= _ma_kpos(info->s->base.key_reflength, father_key_pos+father_keylength); buff=info->buff; - DBUG_PRINT("test",("use right page: %lu",next_page)); + DBUG_PRINT("test",("use right page: %lu", (ulong) next_page)); } else { @@ -784,7 +786,7 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, next_page= _ma_kpos(info->s->base.key_reflength,father_key_pos); /* Fix that curr_buff is to left */ buff=curr_buff; curr_buff=info->buff; - DBUG_PRINT("test",("use left page: %lu",next_page)); + DBUG_PRINT("test",("use left page: %lu", (ulong) next_page)); } /* father_key_pos ptr to parting key */ if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0)) diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 5fb25512d4c..9585ecad292 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -662,7 +662,7 @@ typedef struct st_maria_block_info extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t); extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from); -extern uint _ma_pack_get_block_info(MARIA_HA *mari, MARIA_BIT_BUFF *bit_buff, +extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, MARIA_BLOCK_INFO *info, byte **rec_buff_p, File file, my_off_t filepos); extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length); diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c index 7972ef614a0..1a683e2ac30 100644 --- a/storage/myisam/mi_delete.c +++ b/storage/myisam/mi_delete.c @@ -366,9 +366,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { /* This happens only with packed keys */ DBUG_PRINT("test",("Enlarging of key when deleting")); if (!_mi_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) - { goto err; - } ret_value=_mi_insert(info,keyinfo,key,anc_buff,keypos,lastkey, (uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0); } From 345959c660d7401c9dc991a2c572ba145d6e199c Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 18 Jan 2007 21:38:14 +0200 Subject: [PATCH 85/95] Implementation of rows-in-block - Fixes some things missed in myisam->maria port - Moved variables that holds the state for the current row to 'cur_row' - Changed most uchar * to byte * to be able to remove a lot of casts - Removed RAID support - Added CHECK for rows-in-block - Added allocate_dynamic() for easier usage of dynamic rows when we know how many entries we will need - Reorder columns after CREATE for more optimal row storage (for rows-in-block) - Removed flag 'RRND_PRESERVER_LASTINX' (not needed) - Extended ma_test_all.sh to test more completely all row formats - New structs and variables to hold rows-in-block and bitmap information - Added org_data_file_type in header to allow easy restore of old record format when doing maria_pack / maria_chk -u - More virtual functions to handle different row types - Pointer to row is now MARIA_RECORD_POS instead of my_off_t - New header signature for MARIA index files - Fixed bugs in ma_test1.c and ma_test2.c - All key and row blocks are now of same size - We now only have one link chain for deleted key blocks include/m_string.h: Define bzero_if_purify include/maria.h: Implementation of rows-in-block include/my_base.h: Implementation of rows-in-block include/my_handler.h: Cleanup macros Added size_to_store_key_length() include/my_sys.h: Added 'allocate_dynamic()' include/myisamchk.h: Implementation of rows-in-block mysys/array.c: Added allocate_dynamic() mysys/mf_keycache.c: Moved DBUG_ENTER to it's right position mysys/my_pread.c: Ensure my_errno is always set sql/filesort.cc: Fixed some compiler warnings sql/gen_lex_hash.cc: Removed not needed 'inline' sql/ha_maria.cc: Implementation of rows-in-block Fixed compiler warnings sql/mysqld.cc: Fixed setting of wrong variable sql/uniques.cc: Fixed compiler warnings storage/maria/Makefile.am: Implementation of rows-in-block storage/maria/ma_check.c: Removed RAID functions Added support for CHECK of rows-in-blocks rows storage/maria/ma_checksum.c: Implementation of rows-in-block storage/maria/ma_close.c: Implementation of rows-in-block storage/maria/ma_create.c: Implementation of rows-in-block: - Reorder columns - All key blocks are now of same size - Removed old RAID support storage/maria/ma_dbug.c: Implementation of rows-in-block storage/maria/ma_delete.c: Implementation of rows-in-block storage/maria/ma_delete_all.c: Implementation of rows-in-block storage/maria/ma_dynrec.c: info->rec_buff is now allocated through _ma_alloc_buffer() Use new info->cur_row structure storage/maria/ma_extra.c: Implementation of rows-in-block storage/maria/ma_ft_boolean_search.c: Removed compiler warnings Indentation fixes storage/maria/ma_ft_nlq_search.c: Removed compiler warnings Indentation fixes storage/maria/ma_ft_update.c: Removed some casts storage/maria/ma_fulltext.h: Changed pointer type storage/maria/ma_info.c: Implementation of rows-in-block More general _ma_report_error() storage/maria/ma_init.c: Implementation of rows-in-block storage/maria/ma_key.c: Implementation of rows-in-block Removed some casts storage/maria/ma_keycache.c: Fixed DBUG entry storage/maria/ma_locking.c: Implementation of rows-in-block storage/maria/ma_open.c: Implementation of rows-in-block storage/maria/ma_packrec.c: Indentation fixes Changed uchar * to byte * to make it possible to remove some casts storage/maria/ma_page.c: Implementation of rows-in-block storage/maria/ma_range.c: Implementation of rows-in-block storage/maria/ma_rfirst.c: Implementation of rows-in-block storage/maria/ma_rkey.c: Implementation of rows-in-block Indentation fixes storage/maria/ma_rlast.c: Implementation of rows-in-block storage/maria/ma_rnext.c: Implementation of rows-in-block storage/maria/ma_rnext_same.c: Implementation of rows-in-block storage/maria/ma_rprev.c: Implementation of rows-in-block storage/maria/ma_rrnd.c: Implementation of rows-in-block Removed flag 'RRND_PRESERVER_LASTINX', by not resetting lastinx (This is reset by maria_scan_init()) storage/maria/ma_rsame.c: Implementation of rows-in-block storage/maria/ma_rsamepos.c: Implementation of rows-in-block storage/maria/ma_rt_index.c: Implementation of rows-in-block storage/maria/ma_rt_index.h: Implementation of rows-in-block storage/maria/ma_rt_key.c: Implementation of rows-in-block storage/maria/ma_rt_key.h: Implementation of rows-in-block storage/maria/ma_rt_mbr.c: Implementation of rows-in-block storage/maria/ma_rt_mbr.h: Implementation of rows-in-block storage/maria/ma_rt_split.c: Implementation of rows-in-block storage/maria/ma_rt_test.c: Indentation fix storage/maria/ma_scan.c: Implementation of rows-in-block Added 'maria_scan_end()' storage/maria/ma_search.c: Implementation of rows-in-block storage/maria/ma_sort.c: Indentation fixes uchar -> byte to be able to remove some casts storage/maria/ma_sp_defs.h: uchar * -> byte * storage/maria/ma_sp_key.c: uchar * -> byte * storage/maria/ma_sp_test.c: Indentation fixes storage/maria/ma_static.c: New header signature for MARIA storage/maria/ma_statrec.c: int -> my_bool functions my_off_t -> MARIA_RECORD_POS Fixed argument order for _ma_read_static_record() storage/maria/ma_test1.c: Implementation of rows-in-block Fixed some bugs in VARCHAR and BLOB testing storage/maria/ma_test2.c: Implementation of rows-in-block Fixed bug in BLOB testing storage/maria/ma_test3.c: Implementation of rows-in-block storage/maria/ma_test_all.sh: Run all tests with dynamic, static and block row formats (For the moment we skip REPAIR test of rows-in-block as this is not yet implemented) storage/maria/ma_unique.c: Implementation of rows-in-block storage/maria/ma_update.c: Implementation of rows-in-block storage/maria/ma_write.c: Implementation of rows-in-block Write of row is split into two parts, as rows-in-block format require us to do write of row before keys (to get row position) in contrast to all other row formats storage/maria/maria_chk.c: Implementation of rows-in-block storage/maria/maria_def.h: Implementation of rows-in-block - New structs and variables to hold rows-in-block and bitmap information - Added org_data_file_type in header to allow easy restore of old record format when doing maria_pack / maria_chk -u - More virtual functions to handle different row types - Pointer to row is now MARIA_RECORD_POS instead of my_off_t - uchar -> byte for many parameters to avoid casts storage/maria/maria_ftdump.c: Implementation of rows-in-block storage/maria/maria_pack.c: Implementation of rows-in-block storage/myisam/mi_check.c: Added new row types into switch to avoid compiler warnings Added some casts to avoid warnings after changing type of lastkey and buff storage/myisam/mi_create.c: Fix that 'pack_fields' is calculated correctly storage/myisam/mi_rsamepos.c: Implementation of rows-in-block storage/myisam/mi_test2.c: Fixed wrong printf storage/myisam/sort.c: uchar * -> byte * support-files/magic: Added support for Maria files Fided wrong entry's for MyISAM files storage/maria/ma_bitmap.c: New BitKeeper file ``storage/maria/ma_bitmap.c'' storage/maria/ma_blockrec.c: New BitKeeper file ``storage/maria/ma_blockrec.c'' storage/maria/ma_blockrec.h: New BitKeeper file ``storage/maria/ma_blockrec.h'' --- include/m_string.h | 7 +- include/maria.h | 96 +- include/my_base.h | 4 +- include/my_handler.h | 22 +- include/my_sys.h | 1 + include/myisamchk.h | 20 +- mysys/array.c | 56 +- mysys/mf_keycache.c | 17 +- mysys/my_pread.c | 6 +- sql/filesort.cc | 12 +- sql/gen_lex_hash.cc | 4 +- sql/ha_maria.cc | 69 +- sql/mysqld.cc | 1 + sql/uniques.cc | 2 +- storage/maria/Makefile.am | 3 +- storage/maria/ma_bitmap.c | 1704 ++++++++++++++++ storage/maria/ma_blockrec.c | 2742 ++++++++++++++++++++++++++ storage/maria/ma_blockrec.h | 160 ++ storage/maria/ma_check.c | 1746 ++++++++++------ storage/maria/ma_checksum.c | 25 +- storage/maria/ma_close.c | 29 +- storage/maria/ma_create.c | 452 +++-- storage/maria/ma_dbug.c | 6 +- storage/maria/ma_delete.c | 204 +- storage/maria/ma_delete_all.c | 3 +- storage/maria/ma_dynrec.c | 261 +-- storage/maria/ma_extra.c | 34 +- storage/maria/ma_ft_boolean_search.c | 34 +- storage/maria/ma_ft_nlq_search.c | 35 +- storage/maria/ma_ft_update.c | 23 +- storage/maria/ma_fulltext.h | 2 +- storage/maria/ma_info.c | 22 +- storage/maria/ma_init.c | 2 + storage/maria/ma_key.c | 68 +- storage/maria/ma_keycache.c | 5 +- storage/maria/ma_locking.c | 53 +- storage/maria/ma_open.c | 647 +++--- storage/maria/ma_packrec.c | 273 +-- storage/maria/ma_page.c | 35 +- storage/maria/ma_range.c | 37 +- storage/maria/ma_rfirst.c | 2 +- storage/maria/ma_rkey.c | 74 +- storage/maria/ma_rlast.c | 2 +- storage/maria/ma_rnext.c | 9 +- storage/maria/ma_rnext_same.c | 33 +- storage/maria/ma_rprev.c | 9 +- storage/maria/ma_rrnd.c | 34 +- storage/maria/ma_rsame.c | 26 +- storage/maria/ma_rsamepos.c | 14 +- storage/maria/ma_rt_index.c | 266 +-- storage/maria/ma_rt_index.h | 17 +- storage/maria/ma_rt_key.c | 12 +- storage/maria/ma_rt_key.h | 12 +- storage/maria/ma_rt_mbr.c | 34 +- storage/maria/ma_rt_mbr.h | 29 +- storage/maria/ma_rt_split.c | 15 +- storage/maria/ma_rt_test.c | 9 +- storage/maria/ma_scan.c | 33 +- storage/maria/ma_search.c | 351 ++-- storage/maria/ma_sort.c | 132 +- storage/maria/ma_sp_defs.h | 2 +- storage/maria/ma_sp_key.c | 2 +- storage/maria/ma_sp_test.c | 9 +- storage/maria/ma_static.c | 12 +- storage/maria/ma_statrec.c | 74 +- storage/maria/ma_test1.c | 162 +- storage/maria/ma_test2.c | 122 +- storage/maria/ma_test3.c | 4 +- storage/maria/ma_test_all.sh | 289 +-- storage/maria/ma_unique.c | 30 +- storage/maria/ma_update.c | 37 +- storage/maria/ma_write.c | 216 +- storage/maria/maria_chk.c | 195 +- storage/maria/maria_def.h | 463 +++-- storage/maria/maria_ftdump.c | 6 +- storage/maria/maria_pack.c | 100 +- storage/myisam/mi_check.c | 63 +- storage/myisam/mi_create.c | 10 +- storage/myisam/mi_rsamepos.c | 3 +- storage/myisam/mi_test2.c | 2 +- storage/myisam/sort.c | 10 +- support-files/magic | 8 +- 82 files changed, 8863 insertions(+), 2991 deletions(-) create mode 100644 storage/maria/ma_bitmap.c create mode 100644 storage/maria/ma_blockrec.c create mode 100644 storage/maria/ma_blockrec.h diff --git a/include/m_string.h b/include/m_string.h index e009447c192..6ba1f5f8914 100644 --- a/include/m_string.h +++ b/include/m_string.h @@ -68,7 +68,7 @@ # define bcopy(s, d, n) memcpy((d), (s), (n)) # define bcmp(A,B,C) memcmp((A),(B),(C)) # define bzero(A,B) memset((A),0,(B)) -# define bmove_align(A,B,C) memcpy((A),(B),(C)) +# define bmove_align(A,B,C) memcpy((A),(B),(C)) #endif #if defined(__cplusplus) @@ -127,7 +127,10 @@ extern int bcmp(const char *s1,const char *s2,uint len); extern int my_bcmp(const char *s1,const char *s2,uint len); #undef bcmp #define bcmp(A,B,C) my_bcmp((A),(B),(C)) -#endif +#define bzero_if_purify(A,B) bzero(A,B) +#else +#define bzero_if_purify(A,B) +#endif /* HAVE_purify */ #ifndef bmove512 extern void bmove512(gptr dst,const gptr src,uint len); diff --git a/include/maria.h b/include/maria.h index 705ba7c91c7..b917efe1f6c 100644 --- a/include/maria.h +++ b/include/maria.h @@ -55,6 +55,7 @@ extern "C" { #define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ #define MARIA_MAX_KEY_BLOCK_LENGTH 32768 #define maria_portable_sizeof_char_ptr 8 +#define MARIA_MAX_KEY_LENGTH 1000 /* Max length in bytes */ /* In the following macros '_keyno_' is 0 .. keys-1. @@ -109,13 +110,15 @@ extern "C" { /* Param to/from maria_info */ +typedef ulonglong MARIA_RECORD_POS; + typedef struct st_maria_isaminfo /* Struct from h_info */ { ha_rows records; /* Records in database */ ha_rows deleted; /* Deleted records in database */ - my_off_t recpos; /* Pos for last used record */ - my_off_t newrecpos; /* Pos if we write new record */ - my_off_t dupp_key_pos; /* Position to record with dup key */ + MARIA_RECORD_POS recpos; /* Pos for last used record */ + MARIA_RECORD_POS newrecpos; /* Pos if we write new record */ + MARIA_RECORD_POS dup_key_pos; /* Position to record with dup key */ my_off_t data_file_length; /* Length of data file */ my_off_t max_data_file_length, index_file_length; my_off_t max_index_file_length, delete_length; @@ -146,9 +149,11 @@ typedef struct st_maria_create_info ulonglong auto_increment; ulonglong data_file_length; ulonglong key_file_length; + uint null_bytes; uint old_options; + enum data_file_type org_data_file_type; uint8 language; - my_bool with_auto_increment; + my_bool with_auto_increment, transactional; } MARIA_CREATE_INFO; struct st_maria_info; /* For referense */ @@ -168,25 +173,24 @@ typedef struct st_maria_keydef /* Key definition with open & info */ uint16 keylength; /* Tot length of keyparts (auto) */ uint16 minlength; /* min length of (packed) key (auto) */ uint16 maxlength; /* max length of (packed) key (auto) */ - uint16 block_size_index; /* block_size (auto) */ uint32 version; /* For concurrent read/write */ uint32 ftparser_nr; /* distinct ftparser number */ HA_KEYSEG *seg, *end; struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */ - int(*bin_search) (struct st_maria_info *info, - struct st_maria_keydef *keyinfo, uchar *page, uchar *key, - uint key_len, uint comp_flag, uchar **ret_pos, - uchar *buff, my_bool *was_last_key); - uint(*get_key) (struct st_maria_keydef *keyinfo, uint nod_flag, - uchar **page, uchar *key); - int(*pack_key) (struct st_maria_keydef *keyinfo, uint nod_flag, - uchar *next_key, uchar *org_key, uchar *prev_key, - uchar *key, struct st_maria_s_param *s_temp); - void(*store_key) (struct st_maria_keydef *keyinfo, uchar *key_pos, + int (*bin_search)(struct st_maria_info *info, + struct st_maria_keydef *keyinfo, byte *page, byte *key, + uint key_len, uint comp_flag, byte **ret_pos, + byte *buff, my_bool *was_last_key); + uint(*get_key)(struct st_maria_keydef *keyinfo, uint nod_flag, + byte **page, byte *key); + int (*pack_key)(struct st_maria_keydef *keyinfo, uint nod_flag, + byte *next_key, byte *org_key, byte *prev_key, + const byte *key, struct st_maria_s_param *s_temp); + void (*store_key)(struct st_maria_keydef *keyinfo, byte *key_pos, struct st_maria_s_param *s_temp); - int(*ck_insert) (struct st_maria_info *inf, uint k_nr, uchar *k, uint klen); - int(*ck_delete) (struct st_maria_info *inf, uint k_nr, uchar *k, uint klen); + int (*ck_insert)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen); + int (*ck_delete)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen); } MARIA_KEYDEF; @@ -195,7 +199,7 @@ typedef struct st_maria_keydef /* Key definition with open & info */ typedef struct st_maria_unique_def /* Segment definition of unique */ { uint16 keysegs; /* Number of key-segment */ - uchar key; /* Mapped to which key */ + uint8 key; /* Mapped to which key */ uint8 null_are_equal; HA_KEYSEG *seg, *end; } MARIA_UNIQUEDEF; @@ -218,16 +222,19 @@ struct st_maria_bit_buff; typedef struct st_maria_columndef /* column information */ { - int16 type; /* en_fieldtype */ + uint64 offset; /* Offset to position in row */ + enum en_fieldtype type; uint16 length; /* length of field */ - uint32 offset; /* Offset to position in row */ - uint8 null_bit; /* If column may be 0 */ - uint16 null_pos; /* position for null marker */ + uint16 fill_length; + uint16 null_pos; /* Position for null marker */ + uint16 empty_pos; /* Position for empty marker */ + uint8 null_bit; /* If column may be NULL */ + uint8 empty_bit; /* If column may be empty */ #ifndef NOT_PACKED_DATABASES - void(*unpack) (struct st_maria_columndef *rec, - struct st_maria_bit_buff *buff, - uchar *start, uchar *end); + void(*unpack)(struct st_maria_columndef *rec, + struct st_maria_bit_buff *buff, + byte *start, byte *end); enum en_fieldtype base_type; uint space_length_bits, pack_type; MARIA_DECODE_TREE *huff_tree; @@ -237,7 +244,8 @@ typedef struct st_maria_columndef /* column information */ extern ulong maria_block_size; extern ulong maria_concurrent_insert; -extern my_bool maria_flush, maria_delay_key_write, maria_single_user; +extern my_bool maria_flush, maria_single_user; +extern my_bool maria_delay_key_write, maria_delay_rec_write; extern my_off_t maria_max_temp_length; extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size; extern KEY_CACHE maria_key_cache_var, *maria_key_cache; @@ -260,19 +268,22 @@ extern int maria_rlast(struct st_maria_info *file, byte *buf, int inx); extern int maria_rnext(struct st_maria_info *file, byte *buf, int inx); extern int maria_rnext_same(struct st_maria_info *info, byte *buf); extern int maria_rprev(struct st_maria_info *file, byte *buf, int inx); -extern int maria_rrnd(struct st_maria_info *file, byte *buf, my_off_t pos); +extern int maria_rrnd(struct st_maria_info *file, byte *buf, + MARIA_RECORD_POS pos); extern int maria_scan_init(struct st_maria_info *file); extern int maria_scan(struct st_maria_info *file, byte *buf); +extern void maria_scan_end(struct st_maria_info *file); extern int maria_rsame(struct st_maria_info *file, byte *record, int inx); extern int maria_rsame_with_pos(struct st_maria_info *file, byte *record, - int inx, my_off_t pos); + int inx, MARIA_RECORD_POS pos); extern int maria_update(struct st_maria_info *file, const byte *old, byte *new_record); extern int maria_write(struct st_maria_info *file, byte *buff); -extern my_off_t maria_position(struct st_maria_info *file); +extern MARIA_RECORD_POS maria_position(struct st_maria_info *file); extern int maria_status(struct st_maria_info *info, MARIA_INFO *x, uint flag); extern int maria_lock_database(struct st_maria_info *file, int lock_type); -extern int maria_create(const char *name, uint keys, MARIA_KEYDEF *keydef, +extern int maria_create(const char *name, enum data_file_type record_type, + uint keys, MARIA_KEYDEF *keydef, uint columns, MARIA_COLUMNDEF *columndef, uint uniques, MARIA_UNIQUEDEF *uniquedef, MARIA_CREATE_INFO *create_info, uint flags); @@ -324,7 +335,7 @@ typedef struct st_maria_sort_param MARIA_KEYDEF *keyinfo; MARIA_SORT_INFO *sort_info; HA_KEYSEG *seg; - uchar **sort_keys; + byte **sort_keys; byte *rec_buff; void *wordlist, *wordptr; MEM_ROOT wordroot; @@ -338,16 +349,17 @@ typedef struct st_maria_sort_param ulonglong unique[HA_MAX_KEY_SEG+1]; ulonglong notnull[HA_MAX_KEY_SEG+1]; - my_off_t pos,max_pos,filepos,start_recpos; + MARIA_RECORD_POS pos,max_pos,filepos,start_recpos; uint key, key_length,real_key_length,sortbuff_size; uint maxbuffers, keys, find_length, sort_keys_length; my_bool fix_datafile, master; + my_size_t rec_buff_size; int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *); - int (*key_read)(struct st_maria_sort_param *,void *); - int (*key_write)(struct st_maria_sort_param *, const void *); + int (*key_read)(struct st_maria_sort_param *, byte *); + int (*key_write)(struct st_maria_sort_param *, const byte *); void (*lock_in_memory)(HA_CHECK *); - NEAR int (*write_keys)(struct st_maria_sort_param *, register uchar **, + NEAR int (*write_keys)(struct st_maria_sort_param *, register byte **, uint , struct st_buffpek *, IO_CACHE *); NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); NEAR int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,char *, @@ -356,7 +368,7 @@ typedef struct st_maria_sort_param /* functions in maria_check */ -void mariachk_init(HA_CHECK *param); +void maria_chk_init(HA_CHECK *param); int maria_chk_status(HA_CHECK *param, MARIA_HA *info); int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag); int maria_chk_size(HA_CHECK *param, MARIA_HA *info); @@ -371,19 +383,19 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, const char *name, int rep_quick); int maria_change_to_newfile(const char *filename, const char *old_ext, - const char *new_ext, uint raid_chunks, myf myflags); + const char *new_ext, myf myflags); void maria_lock_memory(HA_CHECK *param); int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update); void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, - ulonglong *unique, ulonglong *notnull, - ulonglong records); + ulonglong *unique, ulonglong *notnull, + ulonglong records); int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start, - my_off_t length, const char *type); + my_off_t length, const char *type); int maria_movepoint(MARIA_HA *info, byte *record, my_off_t oldpos, - my_off_t newpos, uint prot_key); + my_off_t newpos, uint prot_key); int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile); int maria_test_if_almost_full(MARIA_HA *info); -int maria_recreate_table(HA_CHECK *param, MARIA_HA ** org_info, char *filename); +int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename); int maria_disable_indexes(MARIA_HA *info); int maria_enable_indexes(MARIA_HA *info); int maria_indexes_are_disabled(MARIA_HA *info); diff --git a/include/my_base.h b/include/my_base.h index 5ad7ac57fe1..76a67e3f3e7 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -48,7 +48,7 @@ #define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */ #define HA_OPEN_FROM_SQL_LAYER 64 - /* The following is parameter to ha_rkey() how to use key */ +/* The following is parameter to ha_rkey() how to use key */ /* We define a complete-field prefix of a key value as a prefix where @@ -433,7 +433,7 @@ enum en_fieldtype { }; enum data_file_type { - STATIC_RECORD,DYNAMIC_RECORD,COMPRESSED_RECORD + STATIC_RECORD, DYNAMIC_RECORD, COMPRESSED_RECORD, BLOCK_RECORD }; /* For key ranges */ diff --git a/include/my_handler.h b/include/my_handler.h index 9b086036389..fd1bafe8d5c 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -58,33 +58,35 @@ typedef struct st_HA_KEYSEG /* Key-portion */ } HA_KEYSEG; #define get_key_length(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= (uint) (uchar) *((key)++); \ +{ if (*(uchar*) (key) != 255) \ + length= (uint) *(uchar*) ((key)++); \ else \ - { length=mi_uint2korr((key)+1); (key)+=3; } \ + { length= mi_uint2korr((key)+1); (key)+=3; } \ } #define get_key_length_rdonly(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= ((uint) (uchar) *((key))); \ +{ if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key))); \ else \ - { length=mi_uint2korr((key)+1); } \ + { length= mi_uint2korr((key)+1); } \ } #define get_key_pack_length(length,length_pack,key) \ -{ if ((uchar) *(key) != 255) \ - { length= (uint) (uchar) *((key)++); length_pack=1; }\ +{ if (*(uchar*) (key) != 255) \ + { length= (uint) *(uchar*) ((key)++); length_pack= 1; }\ else \ - { length=mi_uint2korr((key)+1); (key)+=3; length_pack=3; } \ + { length=mi_uint2korr((key)+1); (key)+= 3; length_pack= 3; } \ } #define store_key_length_inc(key,length) \ { if ((length) < 255) \ - { *(key)++=(length); } \ + { *(key)++= (length); } \ else \ { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \ } +#define size_to_store_key_length(length) ((length) < 255 ? 1 : 3) + #define get_rec_bits(bit_ptr, bit_ofs, bit_len) \ (((((uint16) (bit_ptr)[1] << 8) | (uint16) (bit_ptr)[0]) >> (bit_ofs)) & \ ((1 << (bit_len)) - 1)) diff --git a/include/my_sys.h b/include/my_sys.h index 4b31f6bcd2b..3a491baf47f 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -760,6 +760,7 @@ extern my_bool insert_dynamic(DYNAMIC_ARRAY *array,gptr element); extern byte *alloc_dynamic(DYNAMIC_ARRAY *array); extern byte *pop_dynamic(DYNAMIC_ARRAY*); extern my_bool set_dynamic(DYNAMIC_ARRAY *array,gptr element,uint array_index); +extern my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements); extern void get_dynamic(DYNAMIC_ARRAY *array,gptr element,uint array_index); extern void delete_dynamic(DYNAMIC_ARRAY *array); extern void delete_dynamic_element(DYNAMIC_ARRAY *array, uint array_index); diff --git a/include/myisamchk.h b/include/myisamchk.h index 442fe5f7ece..66d0a77b62f 100644 --- a/include/myisamchk.h +++ b/include/myisamchk.h @@ -76,8 +76,8 @@ typedef struct st_sort_key_blocks /* Used when sorting */ { - uchar *buff, *end_pos; - uchar lastkey[HA_MAX_POSSIBLE_KEY_BUFF]; + byte *buff, *end_pos; + byte lastkey[HA_MAX_POSSIBLE_KEY_BUFF]; uint last_length; int inited; } SORT_KEY_BLOCKS; @@ -121,20 +121,24 @@ typedef struct st_handler_check_param my_off_t search_after_block; my_off_t new_file_pos, key_file_blocks; my_off_t keydata, totaldata, key_blocks, start_check_pos; - ha_rows total_records, total_deleted; + my_off_t used, empty, splits, del_length, link_used; + ha_rows total_records, total_deleted, records,del_blocks; + ha_rows full_page_count, tail_count; ha_checksum record_checksum, glob_crc; ha_checksum key_crc[HA_MAX_POSSIBLE_KEY]; + ha_checksum tmp_key_crc[HA_MAX_POSSIBLE_KEY]; + ha_checksum tmp_record_checksum; ulong use_buffers, read_buffer_length, write_buffer_length; ulong sort_buffer_length, sort_key_blocks; ulong rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY]; uint out_flag, warning_printed, error_printed, verbose; uint opt_sort_key, total_files, max_level; uint testflag, key_cache_block_size; - int tmpfile_createflag; + int tmpfile_createflag, err_count; myf myf_rw; uint8 language; my_bool using_global_keycache, opt_lock_memory, opt_follow_links; - my_bool retry_repair, force_sort, calc_checksum; + my_bool retry_repair, force_sort, calc_checksum, static_row_size; char temp_filename[FN_REFLEN]; IO_CACHE read_cache; enum_handler_stats_method stats_method; @@ -143,15 +147,15 @@ typedef struct st_handler_check_param typedef struct st_sort_ftbuf { - uchar *buf, *end; + byte *buf, *end; int count; - uchar lastkey[HA_MAX_KEY_BUFF]; + byte lastkey[HA_MAX_KEY_BUFF]; } SORT_FT_BUF; typedef struct st_buffpek { my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* Key pointers */ + byte *base, *key; /* Key pointers */ ha_rows count; /* Number of rows in table */ ulong mem_count; /* numbers of keys in memory */ ulong max_keys; /* Max keys in buffert */ diff --git a/mysys/array.c b/mysys/array.c index a50d8b78178..2017bba5b61 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -61,7 +61,8 @@ my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size, array->max_element=init_alloc; array->alloc_increment=alloc_increment; array->size_of_element=element_size; - if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME)))) + if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc, + MYF(MY_WME)))) { array->max_element=0; DBUG_RETURN(TRUE); @@ -154,7 +155,7 @@ byte *pop_dynamic(DYNAMIC_ARRAY *array) } /* - Replace elemnent in array with given element and index + Replace element in array with given element and index SYNOPSIS set_dynamic() @@ -175,19 +176,8 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx) { if (idx >= array->elements) { - if (idx >= array->max_element) - { - uint size; - char *new_ptr; - size=(idx+array->alloc_increment)/array->alloc_increment; - size*= array->alloc_increment; - if (!(new_ptr=(char*) my_realloc(array->buffer,size* - array->size_of_element, - MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) - return TRUE; - array->buffer=new_ptr; - array->max_element=size; - } + if (idx >= array->max_element && allocate_dynamic(array, idx)) + return TRUE; bzero((gptr) (array->buffer+array->elements*array->size_of_element), (idx - array->elements)*array->size_of_element); array->elements=idx+1; @@ -197,6 +187,42 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx) return FALSE; } + +/* + Ensure that dynamic array has enough elements + + SYNOPSIS + allocate_dynamic() + array + max_elements Numbers of elements that is needed + + NOTES + Any new allocated element are NOT initialized + + RETURN VALUE + FALSE Ok + TRUE Allocation of new memory failed +*/ + +my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) +{ + if (max_elements >= array->max_element) + { + uint size; + char *new_ptr; + size= (max_elements + array->alloc_increment)/array->alloc_increment; + size*= array->alloc_increment; + if (!(new_ptr= (char*) my_realloc(array->buffer,size* + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) + return TRUE; + array->buffer= new_ptr; + array->max_element= size; + } + return FALSE; +} + + /* Get an element from array by given index diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index e6f4348968f..10a3e85eb4d 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -1791,8 +1791,6 @@ byte *key_cache_read(KEY_CACHE *keycache, uint offset= 0; byte *start= buff; DBUG_ENTER("key_cache_read"); - DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", - (uint) file, (ulong) filepos, length)); if (keycache->can_be_used) { @@ -1802,6 +1800,11 @@ byte *key_cache_read(KEY_CACHE *keycache, uint status; int page_st; + DBUG_PRINT("enter", ("fd: %u pos: %lu page: %lu length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length)); + offset= (uint) (filepos & (keycache->key_cache_block_size-1)); /* Read data in key_cache_block_size increments */ do @@ -2053,10 +2056,6 @@ int key_cache_write(KEY_CACHE *keycache, reg1 BLOCK_LINK *block; int error=0; DBUG_ENTER("key_cache_write"); - DBUG_PRINT("enter", - ("fd: %u pos: %lu length: %u block_length: %u key_block_length: %u", - (uint) file, (ulong) filepos, length, block_length, - keycache ? keycache->key_cache_block_size : 0)); if (!dont_write) { @@ -2078,6 +2077,12 @@ int key_cache_write(KEY_CACHE *keycache, int page_st; uint offset; + DBUG_PRINT("enter", + ("fd: %u pos: %lu page: %lu length: %u block_length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length, block_length)); + offset= (uint) (filepos & (keycache->key_cache_block_size-1)); do { diff --git a/mysys/my_pread.c b/mysys/my_pread.c index ac52895efe9..dd174c8a4cc 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -52,7 +52,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, if (!error) /* Seek was successful */ { if ((readbytes = (uint) read(Filedes, Buffer, Count)) == -1L) - my_errno= errno; + my_errno= errno ? errno : -1; /* We should seek back, even if read failed. If this fails, @@ -68,7 +68,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, #else if ((error= ((readbytes = (uint) pread(Filedes, Buffer, Count, offset)) != Count))) - my_errno= errno; + my_errno= errno ? errno : -1; #endif if (error || readbytes != Count) { @@ -84,8 +84,10 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); else if (MyFlags & (MY_NABP | MY_FNABP)) + { my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); + } } if ((int) readbytes == -1 || (MyFlags & (MY_FNABP | MY_NABP))) DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ diff --git a/sql/filesort.cc b/sql/filesort.cc index f41d72ac07a..3fb8517c624 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -1032,7 +1032,7 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length) { - uchar *reuse_end= reuse->base + reuse->max_keys * key_length; + byte *reuse_end= reuse->base + reuse->max_keys * key_length; for (uint i= 0; i < queue->elements; ++i) { BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i); @@ -1103,7 +1103,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, offset= rec_length-res_length; maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1)); to_start_filepos= my_b_tell(to_file); - strpos= (uchar*) sort_buffer; + strpos= sort_buffer; org_max_rows=max_rows= param->max_rows; /* The following will fire if there is not enough space in sort_buffer */ @@ -1115,10 +1115,10 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, DBUG_RETURN(1); /* purecov: inspected */ for (buffpek= Fb ; buffpek <= Tb ; buffpek++) { - buffpek->base= strpos; + buffpek->base= (byte*) strpos; buffpek->max_keys= maxcount; strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek, - rec_length)); + rec_length)); if (error == -1) goto err; /* purecov: inspected */ buffpek->max_keys= buffpek->mem_count; // If less data in buffers than expected @@ -1207,7 +1207,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, } } buffpek= (BUFFPEK*) queue_top(&queue); - buffpek->base= sort_buffer; + buffpek->base= (byte*) sort_buffer; buffpek->max_keys= param->keys; /* @@ -1242,7 +1242,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key+offset; + strpos= (uchar*) buffpek->key+offset; for (end= strpos+buffpek->mem_count*rec_length ; strpos != end ; strpos+= rec_length) diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc index 7e0b178f7af..4b319d0a87c 100644 --- a/sql/gen_lex_hash.cc +++ b/sql/gen_lex_hash.cc @@ -463,8 +463,8 @@ int main(int argc,char **argv) printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2); printf("\ -static inline SYMBOL *get_hash_symbol(const char *s,\n\ - unsigned int len,bool function)\n\ +static SYMBOL *get_hash_symbol(const char *s,\n\ + unsigned int len,bool function)\n\ {\n\ register uchar *hash_map;\n\ register const char *cur_str= s;\n\ diff --git a/sql/ha_maria.cc b/sql/ha_maria.cc index 29718f1493e..aa37cc96452 100644 --- a/sql/ha_maria.cc +++ b/sql/ha_maria.cc @@ -220,10 +220,10 @@ int ha_maria::dump(THD * thd, int fd) { MARIA_SHARE *share= file->s; NET *net= &thd->net; - uint blocksize= share->blocksize; + uint block_size= share->block_size; my_off_t bytes_to_read= share->state.state.data_file_length; int data_fd= file->dfile; - byte *buf= (byte *) my_malloc(blocksize, MYF(MY_WME)); + byte *buf= (byte *) my_malloc(block_size, MYF(MY_WME)); if (!buf) return ENOMEM; @@ -231,7 +231,7 @@ int ha_maria::dump(THD * thd, int fd) my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); for (; bytes_to_read > 0;) { - uint bytes= my_read(data_fd, buf, blocksize, MYF(MY_WME)); + uint bytes= my_read(data_fd, buf, block_size, MYF(MY_WME)); if (bytes == MY_FILE_ERROR) { error= errno; @@ -364,7 +364,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) const char *old_proc_info= thd->proc_info; thd->proc_info= "Checking table"; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "check"; param.db_name= table->s->db.str; @@ -454,7 +454,7 @@ int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) HA_CHECK param; MARIA_SHARE *share= file->s; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "analyze"; param.db_name= table->s->db.str; @@ -510,7 +510,7 @@ int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt) err: { HA_CHECK param; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "restore"; param.db_name= table->s->db.str; @@ -570,7 +570,7 @@ int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt) err: { HA_CHECK param; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "backup"; param.db_name= table->s->db.str; @@ -591,7 +591,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) if (!file) return HA_ADMIN_INTERNAL_ERROR; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "repair"; param.testflag= ((check_opt->flags & ~(T_EXTEND)) | @@ -638,7 +638,7 @@ int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) return HA_ADMIN_INTERNAL_ERROR; HA_CHECK param; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "optimize"; param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | @@ -833,7 +833,7 @@ err: { /* Send error to user */ HA_CHECK param; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "assign_to_keycache"; param.db_name= table->s->db.str; @@ -901,7 +901,7 @@ int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt) err: { HA_CHECK param; - mariachk_init(¶m); + maria_chk_init(¶m); param.thd= thd; param.op_name= "preload_keys"; param.db_name= table->s->db.str; @@ -1010,7 +1010,7 @@ int ha_maria::enable_indexes(uint mode) HA_CHECK param; const char *save_proc_info= thd->proc_info; thd->proc_info= "Creating index"; - mariachk_init(¶m); + maria_chk_init(¶m); param.op_name= "recreating_index"; param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK | T_CREATE_MISSING_KEYS); @@ -1395,7 +1395,7 @@ void ha_maria::info(uint flag) if (flag & HA_STATUS_ERRKEY) { errkey= info.errkey; - my_store_ptr(dup_ref, ref_length, info.dupp_key_pos); + my_store_ptr(dup_ref, ref_length, info.dup_key_pos); } /* Faster to always update, than to do it based on flag */ stats.update_time= info.update_time; @@ -1482,6 +1482,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg, HA_KEYSEG *keyseg; TABLE_SHARE *share= table_arg->s; uint options= share->db_options_in_use; + enum data_file_type row_type; DBUG_ENTER("ha_maria::create"); type= HA_KEYTYPE_BINARY; // Keep compiler happy @@ -1607,27 +1608,27 @@ int ha_maria::create(const char *name, register TABLE *table_arg, if (recpos != minpos) { // Reserved space (Null bits?) bzero((char*) recinfo_pos, sizeof(*recinfo_pos)); - recinfo_pos->type= (int) FIELD_NORMAL; + recinfo_pos->type= FIELD_NORMAL; recinfo_pos++->length= (uint16) (minpos - recpos); } if (!found) break; if (found->flags & BLOB_FLAG) - recinfo_pos->type= (int) FIELD_BLOB; + recinfo_pos->type= FIELD_BLOB; else if (found->type() == MYSQL_TYPE_VARCHAR) recinfo_pos->type= FIELD_VARCHAR; else if (!(options & HA_OPTION_PACK_RECORD)) - recinfo_pos->type= (int) FIELD_NORMAL; + recinfo_pos->type= FIELD_NORMAL; else if (found->zero_pack()) - recinfo_pos->type= (int) FIELD_SKIP_ZERO; + recinfo_pos->type= FIELD_SKIP_ZERO; else - recinfo_pos->type= (int) ((length <= 3 || - (found->flags & ZEROFILL_FLAG)) ? - FIELD_NORMAL : - found->type() == MYSQL_TYPE_STRING || - found->type() == MYSQL_TYPE_VAR_STRING ? - FIELD_SKIP_ENDSPACE : FIELD_SKIP_PRESPACE); + recinfo_pos->type= ((length <= 3 || + (found->flags & ZEROFILL_FLAG)) ? + FIELD_NORMAL : + found->type() == MYSQL_TYPE_STRING || + found->type() == MYSQL_TYPE_VAR_STRING ? + FIELD_SKIP_ENDSPACE : FIELD_SKIP_PRESPACE); if (found->null_ptr) { recinfo_pos->null_bit= found->null_bit; @@ -1666,12 +1667,26 @@ int ha_maria::create(const char *name, register TABLE *table_arg, if (options & HA_OPTION_DELAY_KEY_WRITE) create_flags |= HA_CREATE_DELAY_KEY_WRITE; + switch (info->row_type) { + case ROW_TYPE_FIXED: + row_type= STATIC_RECORD; + break; + case ROW_TYPE_DYNAMIC: + row_type= DYNAMIC_RECORD; + break; + default: + case ROW_TYPE_PAGES: + row_type= BLOCK_RECORD; + break; + } + /* TODO: Check that the following fn_format is really needed */ error= - maria_create(fn_format - (buff, name, "", "", MY_UNPACK_FILENAME | MY_APPEND_EXT), - share->keys, keydef, (uint) (recinfo_pos - recinfo), recinfo, - 0, (MARIA_UNIQUEDEF *) 0, &create_info, create_flags); + maria_create(fn_format(buff, name, "", "", + MY_UNPACK_FILENAME | MY_APPEND_EXT), + row_type, share->keys, keydef, (uint) (recinfo_pos - recinfo), + recinfo, 0, (MARIA_UNIQUEDEF *) 0, &create_info, + create_flags); my_free((gptr) recinfo, MYF(0)); DBUG_RETURN(error); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a666bec32d7..45adaa66a4c 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -7133,6 +7133,7 @@ static void mysql_init_variables(void) when collecting index statistics for MyISAM tables. */ global_system_variables.myisam_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; + global_system_variables.maria_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; /* Variables that depends on compile options */ #ifndef DBUG_OFF diff --git a/sql/uniques.cc b/sql/uniques.cc index ad074f8b2b0..933283bf305 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -446,7 +446,7 @@ static bool merge_walk(uchar *merge_buffer, uint merge_buffer_size, */ for (top= begin; top != end; ++top) { - top->base= merge_buffer + (top - begin) * piece_size; + top->base= (byte*) (merge_buffer + (top - begin) * piece_size); top->max_keys= max_key_count_per_piece; bytes_read= read_to_buffer(file, top, key_length); if (bytes_read == (uint) (-1)) diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index d4315b4d446..b2936143c36 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -28,7 +28,7 @@ bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_chk_DEPENDENCIES= $(LIBRARIES) maria_pack_DEPENDENCIES=$(LIBRARIES) noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test -noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h +noinst_HEADERS = maria_def.h ma_blockrec.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h ma_ft_eval.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test2_DEPENDENCIES= $(LIBRARIES) ma_test3_DEPENDENCIES= $(LIBRARIES) @@ -42,6 +42,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_search.c ma_page.c ma_key.c ma_locking.c \ ma_rrnd.c ma_scan.c ma_cache.c \ ma_statrec.c ma_packrec.c ma_dynrec.c \ + ma_blockrec.c ma_bitmap.c \ ma_update.c ma_write.c ma_unique.c \ ma_delete.c \ ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \ diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c new file mode 100644 index 00000000000..5ed5a776658 --- /dev/null +++ b/storage/maria/ma_bitmap.c @@ -0,0 +1,1704 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Bitmap handling (for records in block) + + The data file starts with a bitmap page, followed by as many data + pages as the bitmap can cover. After this there is a new bitmap page + and more data pages etc. + + The bitmap code assumes there is always an active bitmap page and thus + that there is at least one bitmap page in the file + + Structure of bitmap page: + + Fixed size records (to be implemented later): + + 2 bits are used to indicate: + + 0 Empty + 1 50-75 % full (at least room for 2 records) + 2 75-100 % full (at least room for one record) + 3 100 % full (no more room for records) + + Assuming 8K pages, this will allow us to map: + 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M + + (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN) + + Note that for fixed size rows, we can't add more columns without doing + a full reorganization of the table. The user can always force a dynamic + size row format by specifying ROW_FORMAT=dynamic. + + + Dynamic size records: + + 3 bits are used to indicate + + 0 Empty page + 1 0-30 % full (at least room for 3 records) + 2 30-60 % full (at least room for 2 records) + 3 60-90 % full (at least room for one record) + 4 100 % full (no more room for records) + 5 Tail page, 0-40 % full + 6 Tail page, 40-80 % full + 7 Full tail page or full blob page + + Assuming 8K pages, this will allow us to map: + 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M + + Note that values 1-3 may be adjust for each individual table based on + 'min record length'. Tail pages are for overflow data which can be of + any size and thus doesn't have to be adjusted for different tables. + If we add more columns to the table, some of the originally calculated + 'cut off' points may not be optimal, but they shouldn't be 'drasticly + wrong'. + + When allocating data from the bitmap, we are trying to do it in a + 'best fit' manner. Blobs and varchar blocks are given out in large + continuous extents to allow fast access to these. Before allowing a + row to 'flow over' to other blocks, we will compact the page and use + all space on it. If there is many rows in the page, we will ensure + there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other + rows to grow. + + The bitmap format allows us to extend the row file in big chunks, if needed. + + When calculating the size for a packed row, we will calculate the following + things separately: + - Row header + null_bits + empty_bits fixed size segments etc. + - Size of all char/varchar fields + - Size of each blob field + + The bitmap handler will get all the above information and return + either one page or a set of pages to put the different parts. + + Bitmaps are read on demand in response to insert/delete/update operations. + The following bitmap pointers will be cached and stored on disk on close: + - Current insert_bitmap; When inserting new data we will first try to + fill this one. + - First bitmap which is not completely full. This is updated when we + free data with an update or delete. + + While flushing out bitmaps, we will cache the status of the bitmap in memory + to avoid having to read a bitmap for insert of new data that will not + be of any use + - Total empty space + - Largest number of continuous pages + + Bitmap ONLY goes to disk in the following scenarios + - The file is closed (and we flush all changes to disk) + - On checkpoint + (Ie: When we do a checkpoint, we have to ensure that all bitmaps are + put on disk even if they are not in the page cache). + - When explicitely requested (for example on backup or after recvoery, + to simplify things) + +*/ + +#include "maria_def.h" +#include "ma_blockrec.h" + +/* Number of pages to store blob parts */ +#define BLOB_SEGMENT_MIN_SIZE 128 + +#define FULL_HEAD_PAGE 4 +#define FULL_TAIL_PAGE 7 + +static inline my_bool write_changed_bitmap(MARIA_SHARE *share, + MARIA_FILE_BITMAP *bitmap) +{ + return (key_cache_write(share->key_cache, + bitmap->file, bitmap->page * bitmap->block_size, 0, + (byte*) bitmap->map, + bitmap->block_size, bitmap->block_size, 1)); +} + +/* + Initialize bitmap. This is called the first time a file is opened +*/ + +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) +{ + uint aligned_bit_blocks; + uint max_page_size; + MARIA_FILE_BITMAP *bitmap= &share->bitmap; + uint size= share->block_size; +#ifndef DBUG_OFF + /* We want to have a copy of the bitmap to be able to print differences */ + size*= 2; +#endif + + if (!(bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME)))) + return 1; + + bitmap->file= file; + bitmap->changed= 0; + bitmap->block_size= share->block_size; + /* Size needs to be alligned on 6 */ + aligned_bit_blocks= share->block_size / 6; + bitmap->total_size= aligned_bit_blocks * 6; + /* + In each 6 bytes, we have 6*8/3 = 16 pages covered + The +1 is to add the bitmap page, as this doesn't have to be covered + */ + bitmap->pages_covered= aligned_bit_blocks * 16 + 1; + + /* Update size for bits */ + /* TODO; Make this dependent of the row size */ + max_page_size= share->block_size - PAGE_OVERHEAD_SIZE; + bitmap->sizes[0]= max_page_size; /* Empty page */ + bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100; + bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100; + bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100; + bitmap->sizes[4]= 0; /* Full page */ + bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100; + bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100; + bitmap->sizes[7]= 0; + + pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW); + + /* + Start by reading first page (assume table scan) + Later code is simpler if it can assume we always have an active bitmap. + */ + if (_ma_read_bitmap_page(share, bitmap, (ulonglong) 0)) + return(1); + return 0; +} + + +/* + Free data allocated by _ma_bitmap_init +*/ + +my_bool _ma_bitmap_end(MARIA_SHARE *share) +{ + my_bool res= 0; + _ma_flush_bitmap(share); + pthread_mutex_destroy(&share->bitmap.bitmap_lock); + my_free((byte*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR)); + return res; +} + + +/* + Flush bitmap to disk +*/ + +my_bool _ma_flush_bitmap(MARIA_SHARE *share) +{ + my_bool res= 0; + if (share->bitmap.changed) + { + pthread_mutex_lock(&share->bitmap.bitmap_lock); + if (share->bitmap.changed) + { + res= write_changed_bitmap(share, &share->bitmap); + share->bitmap.changed= 0; + } + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + } + return res; +} + + +/* + Return bitmap pattern for the smallest head block that can hold 'size' + + SYNOPSIS + size_to_head_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0-3 For a description of the bitmap sizes, see the header +*/ + +static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size <= bitmap->sizes[3]) + return 3; + if (size <= bitmap->sizes[2]) + return 2; + if (size <= bitmap->sizes[1]) + return 1; + DBUG_ASSERT(size <= bitmap->sizes[0]); + return 0; +} + + +/* + Return bitmap pattern for block where there is size bytes free +*/ + +uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size < bitmap->sizes[3]) + return 4; + if (size < bitmap->sizes[2]) + return 3; + if (size < bitmap->sizes[1]) + return 2; + return (size < bitmap->sizes[0]) ? 1 : 0; +} + + +/* + Return bitmap pattern for the smallest tail block that can hold 'size' + + SYNOPSIS + size_to_tail_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0, 5 or 6 For a description of the bitmap sizes, see the header +*/ + +static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size <= bitmap->sizes[6]) + return 6; + if (size <= bitmap->sizes[5]) + return 5; + DBUG_ASSERT(size <= bitmap->sizes[0]); + return 0; +} + + +static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size >= bitmap->sizes[0]) + return 0; /* Revert to empty page */ + if (size < bitmap->sizes[6]) + return 7; + if (size < bitmap->sizes[5]) + return 6; + return 5; +} + + +/* + Return size guranteed to be available on a page + + SYNOPSIS + pattern_to_head_size + bitmap Bitmap + pattern Pattern (0-7) + + RETURN + 0 - block_size +*/ + +static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) +{ + DBUG_ASSERT(pattern <= 7); + return bitmap->sizes[pattern]; +} + + +/* + Print bitmap for debugging +*/ + +#ifndef DBUG_OFF + +const char *bits_to_txt[]= +{ + "empty", "00-30% full", "30-60% full", "60-90% full", "full", + "tail 00-40 % full", "tail 40-80 % full", "tail/blob full" +}; + +static void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap) +{ + uchar *pos, *end, *org_pos; + ulong page; + + end= bitmap->map+ bitmap->used_size; + DBUG_LOCK_FILE; + fprintf(DBUG_FILE,"\nBitmap page changes at page %lu\n", + (ulong) bitmap->page); + + page= (ulong) bitmap->page+1; + for (pos= bitmap->map, org_pos= bitmap->map+bitmap->block_size ; pos < end ; + pos+= 6, org_pos+= 6) + { + ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ + ulonglong org_bits= uint6korr(org_pos); + uint i; + if (bits != org_bits) + { + for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3) + { + if ((bits & 7) != (org_bits & 7)) + fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i, + bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]); + } + } + page+= 16; + } + fputc('\n', DBUG_FILE); + DBUG_UNLOCK_FILE; + memcpy(bitmap->map+ bitmap->block_size, bitmap->map, bitmap->block_size); +} + +#endif /* DBUG_OFF */ + + +/*************************************************************************** + Reading & writing bitmap pages +***************************************************************************/ + +/* + Read a given bitmap page + + SYNOPSIS + read_bitmap_page() + info Maria handler + bitmap Bitmap handler + page Page to read + + TODO + Update 'bitmap->used_size' to real size of used bitmap + + RETURN + 0 ok + 1 error (Error writing old bitmap or reading bitmap page) +*/ + +my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + my_off_t position= page * bitmap->block_size; + my_bool res; + DBUG_ENTER("_ma_read_bitmap_page"); + DBUG_ASSERT(page % bitmap->pages_covered == 0); + + bitmap->page= page; + if (position >= share->state.state.data_file_length) + { + share->state.state.data_file_length= position + bitmap->block_size; + bzero(bitmap->map, bitmap->block_size); + bitmap->used_size= 0; + DBUG_RETURN(0); + } + bitmap->used_size= bitmap->total_size; + res= key_cache_read(share->key_cache, + bitmap->file, position, 0, + (byte*) bitmap->map, + bitmap->block_size, bitmap->block_size, 0) == 0; +#ifndef DBUG_OFF + if (!res) + memcpy(bitmap->map+ bitmap->block_size, bitmap->map, bitmap->block_size); +#endif + DBUG_RETURN(res); +} + + +/* + Change to another bitmap page + + SYNOPSIS + _ma_change_bitmap_page() + info Maria handler + bitmap Bitmap handler + page Bitmap page to read + + NOTES + If old bitmap was changed, write it out before reading new one + We return empty bitmap if page is outside of file size + + RETURN + 0 ok + 1 error (Error writing old bitmap or reading bitmap page) +*/ + +static my_bool _ma_change_bitmap_page(MARIA_HA *info, + MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + DBUG_ENTER("_ma_change_bitmap_page"); + DBUG_ASSERT(page % bitmap->pages_covered == 0); + + if (bitmap->changed) + { + if (write_changed_bitmap(info->s, bitmap)) + DBUG_RETURN(1); + bitmap->changed= 0; + } + DBUG_RETURN(_ma_read_bitmap_page(info->s, bitmap, page)); +} + + +/* + Read next suitable bitmap + + SYNOPSIS + move_to_next_bitmap() + bitmap Bitmap handle + + TODO + Add cache of bitmaps to not read something that is not usable + + RETURN + 0 ok + 1 error (either couldn't save old bitmap or read new one +*/ + +static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap) +{ + ulonglong page= bitmap->page; + MARIA_STATE_INFO *state= &info->s->state; + DBUG_ENTER("move_to_next_bitmap"); + + if (state->first_bitmap_with_space != ~(ulonglong) 0 && + state->first_bitmap_with_space != page) + { + page= state->first_bitmap_with_space; + state->first_bitmap_with_space= ~(ulonglong) 0; + } + else + page+= bitmap->pages_covered; + DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page)); +} + + +/**************************************************************************** + Allocate data in bitmaps +****************************************************************************/ + +/* + Store data in 'block' and mark the place used in the bitmap + + SYNOPSIS + fill_block() + bitmap Bitmap handle + block Store data about what we found + best_data Pointer to best 6 byte aligned area in bitmap->map + best_pos Which bit in *best_data the area starts + 0 = first bit pattern, 1 second bit pattern etc + fill_pattern Bitmap pattern to store in best_data[best_pos] +*/ + +static void fill_block(MARIA_FILE_BITMAP *bitmap, + MARIA_BITMAP_BLOCK *block, + uchar *best_data, uint best_pos, uint best_bits, + uint fill_pattern) +{ + uint page, offset, tmp; + uchar *data; + + /* For each 6 bytes we have 6*8/3= 16 patterns */ + page= (best_data - bitmap->map) / 6 * 16 + best_pos; + block->page= bitmap->page + 1 + page; + block->page_count= 1 + TAIL_BIT; + block->empty_space= pattern_to_size(bitmap, best_bits); + block->sub_blocks= 1; + block->org_bitmap_value= best_bits; + block->used= BLOCKUSED_TAIL; + + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + best_pos*= 3; + data= best_data+ best_pos / 8; + offset= best_pos & 7; + tmp= uint2korr(data); + tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); + int2store(data, tmp); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); +} + + +/* + Allocate data for head block + + SYNOPSIS + allocate_head() + bitmap bitmap + size Size of block we need to find + block Store found information here + + RETURN + 0 ok (block is updated) + 1 error (no space in bitmap; block is not touched) +*/ + + +static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, + MARIA_BITMAP_BLOCK *block) +{ + uint min_bits= size_to_head_pattern(bitmap, size); + uchar *data= bitmap->map, *end= data + bitmap->used_size; + uchar *best_data= 0; + uint best_bits= (uint) -1, best_pos; + DBUG_ENTER("allocate_head"); + + LINT_INIT(best_pos); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + + for (; data < end; data += 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + /* + Skip common patterns + We can skip empty pages (if we already found a match) or + anything matching the following pattern as this will be either + a full page or a tail page + */ + if ((!bits && best_data) || + ((bits & LL(04444444444444444)) == LL(04444444444444444))) + continue; + for (i= 0; i < 16 ; i++, bits >>= 3) + { + uint pattern= bits & 7; + if (pattern <= min_bits) + { + if (pattern == min_bits) + { + /* Found perfect match */ + best_bits= min_bits; + best_data= data; + best_pos= i; + goto found; + } + if ((int) pattern > (int) best_bits) + { + best_bits= pattern; + best_data= data; + best_pos= i; + } + } + } + } + if (!best_data) + { + if (bitmap->used_size == bitmap->total_size) + DBUG_RETURN(1); + /* Allocate data at end of bitmap */ + bitmap->used_size+= 6; + best_data= data; + best_pos= best_bits= 0; + } + +found: + fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE); + DBUG_RETURN(0); +} + + +/* + Allocate data for tail block + + SYNOPSIS + allocate_tail() + bitmap bitmap + size Size of block we need to find + block Store found information here + + RETURN + 0 ok (block is updated) + 1 error (no space in bitmap; block is not touched) +*/ + + +static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size, + MARIA_BITMAP_BLOCK *block) +{ + uint min_bits= size_to_tail_pattern(bitmap, size); + uchar *data= bitmap->map, *end= data + bitmap->used_size; + uchar *best_data= 0; + uint best_bits= (uint) -1, best_pos; + DBUG_ENTER("allocate_tail"); + DBUG_PRINT("enter", ("size: %u", size)); + + LINT_INIT(best_pos); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + + for (; data < end; data += 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + /* + Skip common patterns + We can skip empty pages (if we already found a match) or + the following patterns: 1-4 or 7 + */ + + if ((!bits && best_data) || bits == LL(0xffffffffffff)) + continue; + for (i= 0; i < 16; i++, bits >>= 3) + { + uint pattern= bits & 7; + if (pattern <= min_bits && (!pattern || pattern >= 5)) + { + if (pattern == min_bits) + { + best_bits= min_bits; + best_data= data; + best_pos= i; + goto found; + } + if ((int) pattern > (int) best_bits) + { + best_bits= pattern; + best_data= data; + best_pos= i; + } + } + } + } + if (!best_data) + { + if (bitmap->used_size == bitmap->total_size) + DBUG_RETURN(1); + /* Allocate data at end of bitmap */ + bitmap->used_size+= 6; + best_pos= best_bits= 0; + } + +found: + fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE); + DBUG_RETURN(0); +} + + +/* + Allocate data for full blocks + + SYNOPSIS + allocate_full_pages() + bitmap bitmap + pages_needed Total size in pages (bitmap->total_size) we would like to have + block Store found information here + full_page 1 if we are not allowed to split extent + + IMPLEMENTATION + We will return the smallest area >= size. If there is no such + block, we will return the biggest area that satisfies + area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size) + + To speed up searches, we will only consider areas that has at least 16 free + pages starting on an even boundary. When finding such an area, we will + extend it with all previous and following free pages. This will ensure + we don't get holes between areas + + RETURN + # Blocks used + 0 error (no space in bitmap; block is not touched) +*/ + +static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, + ulong pages_needed, + MARIA_BITMAP_BLOCK *block, my_bool full_page) +{ + uchar *data= bitmap->map, *data_end= data + bitmap->used_size; + uchar *page_end= data + bitmap->total_size; + uchar *best_data= 0; + uint min_size; + uint best_area_size, best_prefix_area_size, best_suffix_area_size; + uint page, size; + ulonglong best_prefix_bits; + DBUG_ENTER("allocate_full_pages"); + DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed)); + + /* Following variables are only used if best_data is set */ + LINT_INIT(best_prefix_bits); + LINT_INIT(best_prefix_area_size); + LINT_INIT(best_suffix_area_size); + + min_size= pages_needed; + if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE) + min_size= BLOB_SEGMENT_MIN_SIZE; + best_area_size= ~(uint) 0; + + for (; data < page_end; data+= 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uchar *data_start; + ulonglong prefix_bits= 0; + uint area_size, prefix_area_size, suffix_area_size; + + /* Find area with at least 16 free pages */ + if (bits) + continue; + data_start= data; + /* Find size of area */ + for (data+=6 ; data < data_end ; data+= 6) + { + if ((bits= uint6korr(data))) + break; + } + area_size= (data - data_start) / 6 * 16; + if (area_size >= best_area_size) + continue; + prefix_area_size= suffix_area_size= 0; + if (!bits) + { + /* + End of page; All the rest of the bits on page are part of area + This is needed because bitmap->used_size only covers the set bits + in the bitmap. + */ + area_size+= (page_end - data) / 6 * 16; + if (area_size >= best_area_size) + break; + data= page_end; + } + else + { + /* Add bits at end of page */ + for (; !(bits & 7); bits >>= 3) + suffix_area_size++; + area_size+= suffix_area_size; + } + if (data_start != bitmap->map) + { + /* Add bits before page */ + bits= prefix_bits= uint6korr(data_start - 6); + DBUG_ASSERT(bits != 0); + /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */ + if (!(bits & LL(07000000000000000))) + { + data_start-= 6; + do + { + prefix_area_size++; + bits<<= 3; + } while (!(bits & LL(07000000000000000))); + area_size+= prefix_area_size; + /* Calculate offset to page from data_start */ + prefix_area_size= 16 - prefix_area_size; + } + } + if (area_size >= min_size && area_size <= best_area_size) + { + best_data= data_start; + best_area_size= area_size; + best_prefix_bits= prefix_bits; + best_prefix_area_size= prefix_area_size; + best_suffix_area_size= suffix_area_size; + + /* Prefer to put data in biggest possible area */ + if (area_size <= pages_needed) + min_size= area_size; + else + min_size= pages_needed; + } + } + if (!best_data) + DBUG_RETURN(0); /* No room on page */ + + /* + Now allocate min(pages_needed, area_size), starting from + best_start + best_prefix_area_size + */ + if (best_area_size > pages_needed) + best_area_size= pages_needed; + + /* For each 6 bytes we have 6*8/3= 16 patterns */ + page= ((best_data - bitmap->map) * 8) / 3 + best_prefix_area_size; + block->page= bitmap->page + 1 + page; + block->page_count= best_area_size; + block->empty_space= 0; + block->sub_blocks= 1; + block->org_bitmap_value= 0; + block->used= 0; + DBUG_PRINT("info", ("page: %lu page_count: %u", + (ulong) block->page, block->page_count)); + + if (best_prefix_area_size) + { + ulonglong tmp; + /* Convert offset back to bits */ + best_prefix_area_size= 16 - best_prefix_area_size; + if (best_area_size < best_prefix_area_size) + { + tmp= (LL(1) << best_area_size*3) - 1; + best_area_size= best_prefix_area_size; /* for easy end test */ + } + else + tmp= (LL(1) << best_prefix_area_size*3) - 1; + tmp<<= (16 - best_prefix_area_size) * 3; + DBUG_ASSERT((best_prefix_bits & tmp) == 0); + best_prefix_bits|= tmp; + int6store(best_data, best_prefix_bits); + if (!(best_area_size-= best_prefix_area_size)) + { + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(block->page_count); + } + best_data+= 6; + } + best_area_size*= 3; /* Bits to set */ + size= best_area_size/8; /* Bytes to set */ + bfill(best_data, size, 255); + best_data+= size; + if ((best_area_size-= size * 8)) + { + /* fill last byte */ + *best_data|= (uchar) ((1 << best_area_size) -1); + best_data++; + } + if (data_end < best_data) + bitmap->used_size= (uint) (best_data - bitmap->map); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(block->page_count); +} + + +/**************************************************************************** + Find right bitmaps where to store data +****************************************************************************/ + +/* + Find right bitmap and position for head block + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_head(MARIA_HA *info, uint length, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + /* There is always place for head blocks in bitmap_blocks */ + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_head(bitmap, length, block)) + if (move_to_next_bitmap(info, bitmap)) + return 1; + return 0; +} + + +/* + Find right bitmap and position for tail + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_tail(MARIA_HA *info, uint length, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + DBUG_ENTER("find_tail"); + + /* Needed, as there is no error checking in dynamic_element */ + if (allocate_dynamic(&info->bitmap_blocks, position)) + DBUG_RETURN(1); + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_tail(bitmap, length, block)) + if (move_to_next_bitmap(info, bitmap)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Find right bitmap and position for full blocks in one extent + + NOTES + This is used to allocate the main extent after the 'head' block + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_mid(MARIA_HA *info, ulong pages, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_full_pages(bitmap, pages, block, 1)) + { + if (move_to_next_bitmap(info, bitmap)) + return 1; + } + return 0; +} + + +/* + Find right bitmap and position for putting a blob + + NOTES + The extents are stored last in info->bitmap_blocks + + IMPLEMENTATION + Allocate all full pages for the block + optionally one tail + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_blob(MARIA_HA *info, ulong length) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint full_page_size= FULL_PAGE_SIZE(info->s->block_size); + ulong pages; + uint rest_length, used; + uint first_block_pos; + MARIA_BITMAP_BLOCK *first_block= 0; + DBUG_ENTER("find_blob"); + DBUG_PRINT("enter", ("length: %lu", length)); + + pages= length / full_page_size; + rest_length= (uint) (length - pages * full_page_size); + if (rest_length >= MAX_TAIL_SIZE(info->s->block_size)) + { + pages++; + rest_length= 0; + } + + if (pages) + { + MARIA_BITMAP_BLOCK *block; + if (allocate_dynamic(&info->bitmap_blocks, + info->bitmap_blocks.elements + + pages / BLOB_SEGMENT_MIN_SIZE + 2)) + DBUG_RETURN(1); + first_block_pos= info->bitmap_blocks.elements; + block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements, + MARIA_BITMAP_BLOCK*); + first_block= block; + do + { + used= allocate_full_pages(bitmap, + (pages >= 65535 ? 65535 : (uint) pages), block, + 0); + if (!used && move_to_next_bitmap(info, bitmap)) + DBUG_RETURN(1); + info->bitmap_blocks.elements++; + block++; + } while ((pages-= used) != 0); + } + if (rest_length && find_tail(info, rest_length, + info->bitmap_blocks.elements++)) + DBUG_RETURN(1); + if (first_block) + first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos; + DBUG_RETURN(0); +} + + +static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row) +{ + ulong *length, *end; + uint elements; + /* + Reserve size for: + head block + one extent + tail block + */ + elements= info->bitmap_blocks.elements; + for (length= row->blob_lengths, end= length + info->s->base.blobs; + length < end; length++) + { + if (*length && find_blob(info, *length)) + return 1; + } + row->extents_count= (info->bitmap_blocks.elements - elements); + return 0; +} + + +static void use_head(MARIA_HA *info, ulonglong page, uint size, + uint block_position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + uchar *data; + uint offset, tmp, offset_page; + + block= dynamic_element(&info->bitmap_blocks, block_position, + MARIA_BITMAP_BLOCK*); + block->page= page; + block->page_count= 1 + TAIL_BIT; + block->empty_space= size; + block->sub_blocks= 1; + block->used= BLOCKUSED_TAIL; + + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page= (uint) (page - bitmap->page - 1) * 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + tmp= uint2korr(data); + block->org_bitmap_value= (tmp >> offset) & 7; + tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset); + int2store(data, tmp); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); +} + + +/* + Find out where to split the row; +*/ + +static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, + uint extents_length, uint split_size) +{ + uint row_length= row->base_length; + uint *lengths, *lengths_end; + + DBUG_ASSERT(row_length < split_size); + /* + Store first in all_field_lengths the different parts that are written + to the row. This needs to be in same order as in + ma_block_rec.c::write_block_record() + */ + row->null_field_lengths[-3]= extents_length; + row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length; + row->null_field_lengths[-1]= row->field_lengths_length; + for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS, + lengths_end= (lengths + share->base.pack_fields - share->base.blobs + + EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++) + { + if (row_length + *lengths > split_size) + break; + row_length+= *lengths; + } + return row_length; +} + + +static my_bool write_rest_of_head(MARIA_HA *info, uint position, + ulong rest_length) +{ + MARIA_SHARE *share= info->s; + uint full_page_size= FULL_PAGE_SIZE(share->block_size); + MARIA_BITMAP_BLOCK *block; + + if (position == 0) + { + /* Write out full pages */ + uint pages= rest_length / full_page_size; + + rest_length%= full_page_size; + if (rest_length >= MAX_TAIL_SIZE(share->block_size)) + { + /* Put tail on a full page */ + pages++; + rest_length= 0; + } + if (find_mid(info, rest_length / full_page_size, 1)) + return 1; + /* + Insert empty block after full pages, to allow write_block_record() to + split segment into used + free page + */ + block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*); + block->page_count= 0; + block->used= 0; + } + if (rest_length) + { + if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1)) + return 1; + } + else + { + /* Empty tail block */ + block= dynamic_element(&info->bitmap_blocks, + ELEMENTS_RESERVED_FOR_MAIN_PART - 1, + MARIA_BITMAP_BLOCK *); + block->page_count= 0; + block->used= 0; + } + return 0; +} + + +/* + Find where to store one row + + SYNPOSIS + _ma_bitmap_find_place() + info Maria handler + row Information about row to write + blocks Store data about allocated places here + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_SHARE *share= info->s; + my_bool res= 1; + uint full_page_size, position, max_page_size; + uint head_length, row_length, rest_length, extents_length; + DBUG_ENTER("_ma_bitmap_find_place"); + + blocks->count= 0; + blocks->tail_page_skipped= blocks->page_skipped= 0; + row->extents_count= 0; + /* + Reserver place for the following blocks: + - Head block + - Full page block + - Marker block to allow write_block_record() to split full page blocks + into full and free part + - Tail block + */ + + info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; + max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE); + + pthread_mutex_lock(&share->bitmap.bitmap_lock); + + if (row->total_length <= max_page_size) + { + /* Row fits in one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + if (find_head(info, (uint) row->total_length, position)) + goto abort; + goto end; + } + + /* + First allocate all blobs (so that we can find out the needed size for + the main block. + */ + if (row->blob_length && allocate_blobs(info, row)) + goto abort; + + extents_length= row->extents_count * ROW_EXTENT_SIZE; + if ((head_length= (row->head_length + extents_length)) <= max_page_size) + { + /* Main row part fits into one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + if (find_head(info, head_length, position)) + goto abort; + goto end; + } + + /* Allocate enough space */ + head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; + + /* The first segment size is stored in 'row_length' */ + row_length= find_where_to_split_row(share, row, extents_length, + max_page_size); + + full_page_size= FULL_PAGE_SIZE(share->block_size); + position= 0; + if (head_length - row_length <= full_page_size) + position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ + if (find_head(info, row_length, position)) + goto abort; + rest_length= head_length - row_length; + if (write_rest_of_head(info, position, rest_length)) + goto abort; + +end: + blocks->block= dynamic_element(&info->bitmap_blocks, position, + MARIA_BITMAP_BLOCK*); + blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; + /* First block's page_count is for all blocks */ + blocks->count= info->bitmap_blocks.elements - position; + res= 0; + +abort: + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/* + Find where to put row on update (when head page is already defined) + + SYNPOSIS + _ma_bitmap_find_new_place() + info Maria handler + row Information about row to write + page On which page original row was stored + free_size Free size on head page + blocks Store data about allocated places here + + NOTES + This function is only called when the new row can't fit in the space of + the old row in the head page. + + This is essently same as _ma_bitmap_find_place() except that + we don't call find_head() to search in bitmaps where to put the page. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, + ulonglong page, uint free_size, + MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_SHARE *share= info->s; + my_bool res= 1; + uint full_page_size, position; + uint head_length, row_length, rest_length, extents_length; + DBUG_ENTER("_ma_bitmap_find_new_place"); + + blocks->count= 0; + blocks->tail_page_skipped= blocks->page_skipped= 0; + row->extents_count= 0; + info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; + + pthread_mutex_lock(&share->bitmap.bitmap_lock); + if (share->bitmap.page != page / share->bitmap.pages_covered && + _ma_change_bitmap_page(info, &share->bitmap, + page / share->bitmap.pages_covered)) + goto abort; + + /* + First allocate all blobs (so that we can find out the needed size for + the main block. + */ + if (row->blob_length && allocate_blobs(info, row)) + goto abort; + + extents_length= row->extents_count * ROW_EXTENT_SIZE; + if ((head_length= (row->head_length + extents_length)) <= free_size) + { + /* Main row part fits into one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + use_head(info, page, head_length, position); + goto end; + } + + /* Allocate enough space */ + head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; + + /* The first segment size is stored in 'row_length' */ + row_length= find_where_to_split_row(share, row, extents_length, free_size); + + full_page_size= FULL_PAGE_SIZE(share->block_size); + position= 0; + if (head_length - row_length <= full_page_size) + position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ + use_head(info, page, row_length, position); + rest_length= head_length - row_length; + + if (write_rest_of_head(info, position, rest_length)) + goto abort; + +end: + blocks->block= dynamic_element(&info->bitmap_blocks, position, + MARIA_BITMAP_BLOCK*); + blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; + /* First block's page_count is for all blocks */ + blocks->count= info->bitmap_blocks.elements - position; + res= 0; + +abort: + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/**************************************************************************** + Clear and reset bits +****************************************************************************/ + +static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint fill_pattern) +{ + ulonglong bitmap_page; + uint offset_page, offset, tmp, org_tmp; + uchar *data; + DBUG_ENTER("set_page_bits"); + + bitmap_page= page / bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(1); + + /* Find page number from start of bitmap */ + offset_page= page - bitmap->page - 1; + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page*= 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + org_tmp= tmp= uint2korr(data); + tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); + if (tmp == org_tmp) + DBUG_RETURN(0); /* No changes */ + int2store(data, tmp); + + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + if (fill_pattern != 3 && fill_pattern != 7 && + page < info->s->state.first_bitmap_with_space) + info->s->state.first_bitmap_with_space= page; + DBUG_RETURN(0); +} + + +/* + Get bitmap pattern for a given page + + SYNOPSIS + + get_page_bits() + info Maria handler + bitmap Bitmap handler + page Page number + + RETURN + 0-7 Bitmap pattern + ~0 Error (couldn't read page) +*/ + +static uint get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + ulonglong bitmap_page; + uint offset_page, offset, tmp; + uchar *data; + DBUG_ENTER("get_page_bits"); + + bitmap_page= page / bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(~ (uint) 0); + + /* Find page number from start of bitmap */ + offset_page= page - bitmap->page - 1; + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page*= 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + tmp= uint2korr(data); + DBUG_RETURN((tmp >> offset) & 7); +} + + +/* + Mark all pages in a region as free + + SYNOPSIS + reset_full_page_bits() + info Maria handler + bitmap Bitmap handler + page Start page + page_count Number of pages + + NOTES + We assume that all pages in region is covered by same bitmap + One must have a lock on info->s->bitmap.bitmap_lock + + RETURN + 0 ok + 1 Error (when reading bitmap) +*/ + +my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint page_count) +{ + ulonglong bitmap_page; + uint offset, bit_start, bit_count, tmp; + uchar *data; + DBUG_ENTER("_ma_reset_full_page_bits"); + DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count)); + safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock); + + bitmap_page= page / bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(1); + + /* Find page number from start of bitmap */ + page= page - bitmap->page - 1; + + /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */ + bit_start= page * 3; + bit_count= page_count * 3; + + data= bitmap->map + bit_start / 8; + offset= bit_start & 7; + + tmp= (255 << offset); /* Bits to keep */ + if (bit_count + offset < 8) + { + /* Only clear bits between 'offset' and 'offset+bit_count-1' */ + tmp^= (255 << (offset + bit_count)); + } + *data&= ~tmp; + + if ((int) (bit_count-= (8 - offset)) > 0) + { + uint fill; + data++; + /* + -1 is here to avoid one 'if' statement and to let the following code + handle the last byte + */ + if ((fill= (bit_count - 1) / 8)) + { + bzero(data, fill); + data+= fill; + } + bit_count-= fill * 8; /* Bits left to clear */ + tmp= (1 << bit_count) - 1; + *data&= ~tmp; + } + if (bitmap->page < (ulonglong) info->s->state.first_bitmap_with_space) + info->s->state.first_bitmap_with_space= bitmap->page; + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(0); +} + + +/* + Correct bitmap pages to reflect the true allocation + + SYNOPSIS + _ma_bitmap_release_unused() + info Maria handle + blocks Bitmap blocks + + IMPLEMENTATION + If block->used & BLOCKUSED_TAIL is set: + If block->used & BLOCKUSED_USED is set, then the bits for the + corresponding page is set according to block->empty_space + If block->used & BLOCKUSED_USED is not set, then the bits for + the corresponding page is set to org_bitmap_value; + + If block->used & BLOCKUSED_TAIL is not set: + if block->used is not set, the bits for the corresponding page are + cleared + + For the first block (head block) the logic is same as for a tail block + + RETURN + 0 ok + 1 error (Couldn't write or read bitmap page) +*/ + +my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count; + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint bits, current_bitmap_value; + DBUG_ENTER("_ma_bitmap_release_unused"); + + /* + We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full' + when we allocated space in the page + */ + current_bitmap_value= FULL_HEAD_PAGE; + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + + /* First handle head block */ + if (block->used & BLOCKUSED_USED) + { + DBUG_PRINT("info", ("head empty_space: %u", block->empty_space)); + bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space); + if (block->used & BLOCKUSED_USE_ORG_BITMAP) + current_bitmap_value= block->org_bitmap_value; + } + else + bits= block->org_bitmap_value; + if (bits != current_bitmap_value && + set_page_bits(info, bitmap, block->page, bits)) + goto err; + + + /* Handle all full pages and tail pages (for head page and blob) */ + for (block++; block < end; block++) + { + if (block->used & BLOCKUSED_TAIL) + { + if (block->used & BLOCKUSED_USED) + { + DBUG_PRINT("info", ("tail empty_space: %u", block->empty_space)); + bits= free_size_to_tail_pattern(bitmap, block->empty_space); + } + else + bits= block->org_bitmap_value; + if (bits != FULL_TAIL_PAGE && + set_page_bits(info, bitmap, block->page, bits)) + goto err; + } + if (!(block->used & BLOCKUSED_USED) && + _ma_reset_full_page_bits(info, bitmap, + block->page, block->page_count)) + goto err; + } + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(0); + +err: + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(1); +} + + +/* + Free full pages from bitmap + + SYNOPSIS + _ma_bitmap_free_full_pages() + info Maria handle + extents Extents (as stored on disk) + count Number of extents + + IMPLEMENTATION + Mark all full pages (not tails) from extents as free + + RETURN + 0 ok + 1 error (Couldn't write or read bitmap page) +*/ + +my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents, + uint count) +{ + DBUG_ENTER("_ma_bitmap_free_full_pages"); + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + for (; count--; extents += ROW_EXTENT_SIZE) + { + ulonglong page= uint5korr(extents); + uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE); + if (!(page_count & TAIL_BIT)) + { + if (_ma_reset_full_page_bits(info, &info->s->bitmap, page, page_count)) + { + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(1); + } + } + } + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(0); +} + + + +my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head, + uint empty_space) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint bits; + my_bool res; + DBUG_ENTER("_ma_bitmap_set"); + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + bits= (head ? + _ma_free_size_to_head_pattern(bitmap, empty_space) : + free_size_to_tail_pattern(bitmap, empty_space)); + res= set_page_bits(info, bitmap, pos, bits); + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/* + Check that bitmap pattern is correct for a page + + NOTES + Used in maria_chk + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_check_bitmap_data(MARIA_HA *info, + enum en_page_type page_type, ulonglong page, + uint empty_space, uint *bitmap_pattern) +{ + uint bits; + switch (page_type) { + case UNALLOCATED_PAGE: + case MAX_PAGE_TYPE: + bits= 0; + break; + case HEAD_PAGE: + bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space); + break; + case TAIL_PAGE: + bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space); + break; + case BLOB_PAGE: + bits= FULL_TAIL_PAGE; + break; + } + return (*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) != + bits; +} + + +/* + Check that bitmap pattern is correct for a page + + NOTES + Used in maria_chk + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, + enum en_page_type page_type, + ulonglong page, + uint *bitmap_pattern) +{ + if ((*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) > 7) + return 1; /* Couldn't read page */ + switch (page_type) { + case HEAD_PAGE: + return *bitmap_pattern < 1 || *bitmap_pattern > 4; + case TAIL_PAGE: + return *bitmap_pattern < 5; + case BLOB_PAGE: + return *bitmap_pattern != 7; + default: + break; + } + DBUG_ASSERT(0); + return 1; +} diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c new file mode 100644 index 00000000000..6eac151d76e --- /dev/null +++ b/storage/maria/ma_blockrec.c @@ -0,0 +1,2742 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Storage of records in block + + Maria will have a LSN at start of each page (including the bitmap page) + Maria will for each row have the additional information: + + TRANSID Transaction ID that last updated row (6 bytes) + VER_PTR Version pointer that points on the UNDO entry that + contains last version of the row versions (7 bytes) + + The different page types that are in a data file are: + + Bitmap pages Map of free pages in the next extent (8129 page size + gives us 256M of mapped pages / bitmap) + Head page Start of rows are stored on this page. + A rowid always points to a head page + Blob page This page is totally filled with data from one blob or by + a set of long VARCHAR/CHAR fields + Tail page This contains the last part from different rows, blobs + or varchar fields. + + The data file starts with a bitmap page, followed by as many data + pages as the bitmap can cover. After this there is a new bitmap page + and more data pages etc. + + For information about the bitmap page, see ma_bitmap.c + + Structure of data and tail page: + + The page has a row directory at end of page to allow us to do deletes + without having to reorganize the page. It also allows us to store some + extra bytes after each row to allow them to grow without having to move + around other rows + + Page header: + + LSN 7 bytes Log position for last page change + PAGE_TYPE 1 byte 1 for head / 2 for tail / 3 for blob + NO 1 byte Number of row/tail entries on page + empty space 2 bytes Empty space on page + + The upmost bit in PAGE_TYPE is set to 1 if the data on the page + can be compacted to get more space. (PAGE_CAN_BE_COMPACTED) + + Row data + + Row directory of NO entires, that consist of the following for each row + (in reverse order; ie, first record is stored last): + + Position 2 bytes Position of row on page + Length 2 bytes Length of entry + + For Position and Length, the 1 upmost bit of the position and the 1 + upmost bit of the length could be used for some states of the row (in + other words, we should try to keep these reserved) + + eof flag 1 byte Reserved for full page read testing + + ---------------- + + Structure of blob pages: + + LSN 7 bytes Log position for last page change + PAGE_TYPE 1 byte 3 + + data + + ----------------- + + Row data structure: + + Flag 1 byte Marker of which header field exists + TRANSID 6 bytes TRANSID of changing transaction + (optional, added on insert and first + update/delete) + VER_PTR 7 bytes Pointer to older version in log + (undo record) + (optional, added after first + update/delete) + DELETE_TRANSID 6 bytes (optional). TRANSID of original row. + Added on delete. + Nulls_extended 1 byte To allow us to add new DEFAULT NULL + fields (optional, added after first + change of row after alter table) + Number of ROW_EXTENT's 1-3 byte Length encoded, optional + This is the number of extents the + row is split into + First row_extent 7 byte Pointer to first row extent (optional) + + Total length of length array 1-3 byte Only used if we have + char/varchar/blob fields. + Row checksum 1 byte Only if table created with checksums + Null_bits .. One bit for each NULL field + Empty_bits .. One bit for each NOT NULL field. This bit is + 0 if the value is 0 or empty string. + + field_offsets 2 byte/offset + For each 32 field, there is one offset that + points to where the field information starts + in the block. This is to provide fast access + to later field in the row when we only need + to return a small set of fields. + + Things marked above as 'optional' will only be present if the corresponding + bit is set in 'Flag' field. + + Data in the following order: + (Field order is precalculated when table is created) + + Critical fixed length, not null, fields. (Note, these can't be dropped) + Fixed length, null fields + + Length array, 1-4 byte per field for all CHAR/VARCHAR/BLOB fields. + Number of bytes used in length array per entry is depending on max length + for field. + + ROW_EXTENT's + CHAR data (space stripped) + VARCHAR data + BLOB data + + Fields marked in null_bits or empty_bits are not stored in data part or + length array. + + If row doesn't fit into the given block, then the first EXTENT will be + stored last on the row. This is done so that we don't break any field + data in the middle. + + We first try to store the full row into one block. If that's not possible + we move out each big blob into their own extents. If this is not enough we + move out a concatenation of all varchars to their own extent. + + Each blob and the concatenated char/varchar fields are stored the following + way: + - Store the parts in as many full-contiguous pages as possible. + - The last part, that doesn't fill a full page, is stored in tail page. + + When doing an insert of a new row, we don't have to have + VER_PTR in the row. This will make rows that are not changed stored + efficiently. On update and delete we would add TRANSID (if it was an old + committed row) and VER_PTR to + the row. On row page compaction we can easily detect rows where + TRANSID was committed before the the longest running transaction + started and we can then delete TRANSID and VER_PTR from the row to + gain more space. + + If a row is deleted in Maria, we change TRANSID to current transid and + change VER_PTR to point to the undo record for the delete. The undo + record must contain the original TRANSID, so that another transaction + can use this to check if they should use the found row or go to the + previous row pointed to by the VER_PTR in the undo row. + + Description of the different parts: + + Flag is coded as: + + Description bit + TRANS_ID_exists 0 + VER_PTR_exists 1 + Row is deleted 2 (Means that DELETE_TRANSID exists) + Nulls_extended_exists 3 + Row is split 7 This means that 'Number_of_row_extents' exists + + + This would be a way to get more space on a page when doing page + compaction as we don't need to store TRANSID that have committed + before the smallest running transaction we have in memory. + + Nulls_extended is the number of new DEFAULT NULL fields in the row + compared to the number of DEFAULT NULL fields when the first version + of the table was created. If Nulls_extended doesn't exist in the row, + we know it's 0 as this must be one of the original rows from when the + table was created first time. This coding allows us to add 255*8 = + 2048 new fields without requiring a full alter table. + + Empty_bits is used to allow us to store 0, 0.0, empty string, empty + varstring and empty blob efficiently. (This is very good for data + warehousing where NULL's are often regarded as evil). Having this + bitmap also allows us to drop information of a field during a future + delete if field was deleted with ALTER TABLE DROP COLUMN. To be able + to handle DROP COLUMN, we must store in the index header the fields + that has been dropped. When unpacking a row we will ignore dropped + fields. When storing a row, we will mark a dropped field either with a + null in the null bit map or in the empty_bits and not store any data + for it. + + One ROW_EXTENT is coded as: + + START_PAGE 5 bytes + PAGE_COUNT 2 bytes. High bit is used to indicate tail page/ + end of blob + With 8K pages, we can cover 256M in one extent. This coding gives us a + maximum file size of 2^40*8192 = 8192 tera + + As an example of ROW_EXTENT handling, assume a row with one integer + field (value 5), two big VARCHAR fields (size 250 and 8192*3), and 2 + big BLOB fields that we have updated. + + The record format for storing this into an empty file would be: + + Page 1: + + 00 00 00 00 00 00 00 LSN + 01 Only one row in page + xx xx Empty space on page + + 10 Flag: row split, VER_PTR exists + 01 00 00 00 00 00 TRANSID 1 + 00 00 00 00 00 01 00 VER_PTR to first block in LOG file 1 + 5 Number of row extents + 02 00 00 00 00 03 00 VARCHAR's are stored in full pages 2,3,4 + 0 No null fields + 0 No empty fields + 05 00 00 00 00 00 80 Tail page for VARCHAR, rowid 0 + 06 00 00 00 00 80 00 First blob, stored at page 6-133 + 05 00 00 00 00 01 80 Tail of first blob (896 bytes) at page 5 + 86 00 00 00 00 80 00 Second blob, stored at page 134-262 + 05 00 00 00 00 02 80 Tail of second blob (896 bytes) at page 5 + 05 00 5 integer + FA Length of first varchar field (size 250) + 00 60 Length of second varchar field (size 8192*3) + 00 60 10 First medium BLOB, 1M + 01 00 10 00 Second BLOB, 1M + xx xx xx xx xx xx Varchars are stored here until end of page + + ..... until end of page + + 09 00 F4 1F 00 (Start position 9, length 8180, end byte) +*/ + +#define SANITY_CHECKS + +#include "maria_def.h" +#include "ma_blockrec.h" + +typedef struct st_maria_extent_cursor +{ + byte *extent; + byte *data_start; /* For error checking */ + MARIA_RECORD_POS *tail_positions; + my_off_t page; + uint extent_count, page_count; + uint tail; /* <> 0 if current extent is a tail page */ + my_bool first_extent; +} MARIA_EXTENT_CURSOR; + + +static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails); +static my_bool delete_head_or_tail(MARIA_HA *info, + ulonglong page, uint record_number, + my_bool head); +static void _ma_print_directory(byte *buff, uint block_size); + +/**************************************************************************** + Initialization +****************************************************************************/ + +/* + Initialize data needed for block structures +*/ + + +/* Size of the different header elements for a row */ + +static uchar header_sizes[]= +{ + TRANSID_SIZE, + VERPTR_SIZE, + TRANSID_SIZE, /* Delete transid */ + 1 /* Null extends */ +}; + +/* + Calculate array of all used headers + + Used to speed up: + + size= 1; + if (flag & 1) + size+= TRANSID_SIZE; + if (flag & 2) + size+= VERPTR_SIZE; + if (flag & 4) + size+= TRANSID_SIZE + if (flag & 8) + size+= 1; + + NOTES + This is called only once at startup of Maria +*/ + +static uchar total_header_size[1 << array_elements(header_sizes)]; +#define PRECALC_HEADER_BITMASK (array_elements(total_header_size) -1) + +void _ma_init_block_record_data(void) +{ + uint i; + bzero(total_header_size, sizeof(total_header_size)); + total_header_size[0]= FLAG_SIZE; /* Flag byte */ + for (i= 1; i < array_elements(total_header_size); i++) + { + uint size= FLAG_SIZE, j, bit; + for (j= 0; (bit= (1 << j)) <= i; j++) + { + if (i & bit) + size+= header_sizes[j]; + } + total_header_size[i]= size; + } +} + + +my_bool _ma_once_init_block_row(MARIA_SHARE *share, File data_file) +{ + + share->base.max_data_file_length= + (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) * + share->block_size; +#if SIZEOF_OFF_T == 4 + set_if_smaller(max_data_file_length, INT_MAX32); +#endif + return _ma_bitmap_init(share, data_file); +} + + +my_bool _ma_once_end_block_row(MARIA_SHARE *share) +{ + int res= _ma_bitmap_end(share); + if (flush_key_blocks(share->key_cache, share->bitmap.file, + share->temporary ? FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE)) + res= 1; + if (share->bitmap.file >= 0 && my_close(share->bitmap.file, MYF(MY_WME))) + res= 1; + return res; +} + + +/* Init info->cur_row structure */ + +my_bool _ma_init_block_row(MARIA_HA *info) +{ + MARIA_ROW *row= &info->cur_row, *new_row= &info->new_row; + DBUG_ENTER("_ma_init_block_row"); + + if (!my_multi_malloc(MY_WME, + &row->empty_bits_buffer, info->s->base.pack_bytes, + &row->field_lengths, info->s->base.max_field_lengths, + &row->blob_lengths, sizeof(ulong) * info->s->base.blobs, + &row->null_field_lengths, (sizeof(uint) * + (info->s->base.fields - + info->s->base.blobs)), + &row->tail_positions, (sizeof(MARIA_RECORD_POS) * + (info->s->base.blobs + 2)), + &new_row->empty_bits_buffer, info->s->base.pack_bytes, + &new_row->field_lengths, + info->s->base.max_field_lengths, + &new_row->blob_lengths, + sizeof(ulong) * info->s->base.blobs, + &new_row->null_field_lengths, (sizeof(uint) * + (info->s->base.fields - + info->s->base.blobs)), + NullS, 0)) + DBUG_RETURN(1); + if (my_init_dynamic_array(&info->bitmap_blocks, + sizeof(MARIA_BITMAP_BLOCK), + ELEMENTS_RESERVED_FOR_MAIN_PART, 16)) + my_free((char*) &info->bitmap_blocks, MYF(0)); + row->base_length= new_row->base_length= info->s->base_length; + DBUG_RETURN(0); +} + +void _ma_end_block_row(MARIA_HA *info) +{ + DBUG_ENTER("_ma_end_block_row"); + my_free((gptr) info->cur_row.empty_bits_buffer, MYF(MY_ALLOW_ZERO_PTR)); + delete_dynamic(&info->bitmap_blocks); + my_free((gptr) info->cur_row.extents, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + Helper functions +****************************************************************************/ + +static inline uint empty_pos_after_row(byte *dir) +{ + byte *prev; + /* + Find previous used entry. (There is always a previous entry as + the directory never starts with a deleted entry) + */ + for (prev= dir - DIR_ENTRY_SIZE ; + prev[0] == 0 && prev[1] == 0 ; + prev-= DIR_ENTRY_SIZE) + {} + return (uint) uint2korr(prev); +} + + +static my_bool check_if_zero(byte *pos, uint length) +{ + byte *end; + for (end= pos+ length; pos != end ; pos++) + if (pos[0] != 0) + return 1; + return 0; +} + + +/* + Find free postion in directory + + SYNOPSIS + find_free_position() + buff Page + block_size Size of page + res_rownr Store index to free position here + res_length Store length of found segment here + empty_space Store length of empty space on disk here. This is + all empty space, including the found block. + + NOTES + If there is a free directory entry (entry with postion == 0), + then use it and change it to be the size of the empty block + after the previous entry. This guarantees that all row entries + are stored on disk in inverse directory order, which makes life easier for + 'compact_page()' and to know if there is free space after any block. + + If there is no free entry (entry with postion == 0), then we create + a new one. + + We will update the offset and the length of the found dir entry to + match the position and empty space found. + + buff[EMPTY_SPACE_OFFSET] is NOT updated but left up to the caller + + RETURN + 0 Error (directory full) + # Pointer to directory entry on page +*/ + +static byte *find_free_position(byte *buff, uint block_size, uint *res_rownr, + uint *res_length, uint *empty_space) +{ + uint max_entry= (uint) ((uchar*) buff)[DIR_ENTRY_OFFSET]; + uint entry, length, first_pos; + byte *dir, *end; + + dir= (buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE); + end= buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + + first_pos= PAGE_HEADER_SIZE; + *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + /* Search after first empty position */ + for (entry= 0 ; dir <= end ; end-= DIR_ENTRY_SIZE, entry--) + { + if (end[0] == 0 && end[1] == 0) /* Found not used entry */ + { + length= empty_pos_after_row(end) - first_pos; + int2store(end, first_pos); /* Update dir entry */ + int2store(end + 2, length); + *res_rownr= entry; + *res_length= length; + return end; + } + first_pos= uint2korr(end) + uint2korr(end + 2); + } + /* No empty places in dir; create a new one */ + if (max_entry == MAX_ROWS_PER_PAGE) + return 0; + buff[DIR_ENTRY_OFFSET]= (byte) (uchar) max_entry+1; + dir-= DIR_ENTRY_SIZE; + length= (uint) (dir - buff - first_pos); + DBUG_ASSERT(length <= *empty_space - DIR_ENTRY_SIZE); + int2store(dir, first_pos); + int2store(dir+2, length); /* Current max length */ + *res_rownr= max_entry; + *res_length= length; + + /* Reduce directory entry size from free space size */ + (*empty_space)-= DIR_ENTRY_SIZE; + return dir; + +} + +/**************************************************************************** + Updating records +****************************************************************************/ + +/* + Calculate length of all the different field parts +*/ + +static void calc_record_size(MARIA_HA *info, const byte *record, + MARIA_ROW *row) +{ + MARIA_SHARE *share= info->s; + byte *field_length_data; + MARIA_COLUMNDEF *rec, *end_field; + uint blob_count= 0, *null_field_lengths= row->null_field_lengths; + + row->normal_length= row->char_length= row->varchar_length= + row->blob_length= row->extents_count= 0; + + /* Create empty bitmap and calculate length of each varlength/char field */ + bzero(row->empty_bits_buffer, share->base.pack_bytes); + row->empty_bits= row->empty_bits_buffer; + field_length_data= row->field_lengths; + for (rec= share->rec + share->base.fixed_not_null_fields, + end_field= share->rec + share->base.fields; + rec < end_field; rec++, null_field_lengths++) + { + if ((record[rec->null_pos] & rec->null_bit)) + { + if (rec->type != FIELD_BLOB) + *null_field_lengths= 0; + continue; + } + switch ((enum en_fieldtype) rec->type) { + case FIELD_CHECK: + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_ZERO: + DBUG_ASSERT(rec->empty_bit == 0); + /* fall through */ + case FIELD_SKIP_PRESPACE: /* Not packed */ + row->normal_length+= rec->length; + *null_field_lengths= rec->length; + break; + case FIELD_SKIP_ZERO: /* Fixed length field */ + if (memcmp(record+ rec->null_pos, maria_zero_string, + rec->length) == 0) + { + row->empty_bits[rec->empty_pos] |= rec->empty_bit; + *null_field_lengths= 0; + } + else + { + row->normal_length+= rec->length; + *null_field_lengths= rec->length; + } + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + const char *pos, *end; + for (pos= record + rec->offset, end= pos + rec->length; + end > pos && end[-1] == ' '; end--) + ; + if (pos == end) /* If empty string */ + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + *null_field_lengths= 0; + } + else + { + uint length= (end - pos); + if (rec->length <= 255) + *field_length_data++= (byte) (uchar) length; + else + { + int2store(field_length_data, length); + field_length_data+= 2; + } + row->char_length+= length; + *null_field_lengths= length; + } + break; + } + case FIELD_VARCHAR: + { + uint length; + const byte *field_pos= record + rec->offset; + /* 256 is correct as this includes the length byte */ + if (rec->length <= 256) + { + if (!(length= (uint) (uchar) *field_pos)) + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + *null_field_lengths= 0; + break; + } + *field_length_data++= *field_pos; + } + else + { + if (!(length= uint2korr(field_pos))) + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + break; + } + field_length_data[0]= field_pos[0]; + field_length_data[1]= field_pos[1]; + field_length_data+= 2; + } + row->varchar_length+= length; + *null_field_lengths= length; + break; + } + case FIELD_BLOB: + { + const byte *field_pos= record + rec->offset; + uint size_length= rec->length - maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_pos); + if (!blob_length) + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + row->blob_lengths[blob_count++]= 0; + break; + } + row->blob_length+= blob_length; + row->blob_lengths[blob_count++]= blob_length; + memcpy(field_length_data, field_pos, size_length); + field_length_data+= size_length; + break; + } + default: + DBUG_ASSERT(0); + } + } + row->field_lengths_length= (uint) (field_length_data - row->field_lengths); + row->head_length= (row->base_length + + share->base.fixed_not_null_fields_length + + row->field_lengths_length + + size_to_store_key_length(row->field_lengths_length) + + row->normal_length + + row->char_length + row->varchar_length); + row->total_length= (row->head_length + row->blob_length); + if (row->total_length < share->base.min_row_length) + row->total_length= share->base.min_row_length; +} + + +/* + Compact page by removing all space between rows + + IMPLEMENTATION + Move up all rows to start of page. + Move blocks that are directly after each other with one memmove. + + TODO LATER + Remove TRANSID from rows that are visible to all transactions + + SYNOPSIS + compact_page() + buff Page to compact + block_size Size of page + recnr Put empty data after this row +*/ + + +void compact_page(byte *buff, uint block_size, uint rownr) +{ + uint max_entry= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET]; + uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block; + byte *dir, *end; + DBUG_ENTER("compact_page"); + DBUG_PRINT("enter", ("rownr: %u", rownr)); + DBUG_ASSERT(max_entry > 0 && + max_entry < (block_size - PAGE_HEADER_SIZE - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE); + + /* Move all entries before and including rownr up to start of page */ + dir= buff + block_size - DIR_ENTRY_SIZE * (rownr+1) - PAGE_SUFFIX_SIZE; + end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE; + page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE; + diff= 0; + for (; dir <= end ; end-= DIR_ENTRY_SIZE) + { + uint offset= uint2korr(end); + + if (offset) + { + uint row_length= uint2korr(end + 2); + DBUG_ASSERT(offset >= page_pos); + DBUG_ASSERT(buff + offset + row_length <= dir); + + if (offset != next_free_pos) + { + uint length= (next_free_pos - start_of_found_block); + /* + There was empty space before this and prev block + Check if we have to move prevous block up to page start + */ + if (page_pos != start_of_found_block) + { + /* move up previous block */ + memmove(buff + page_pos, buff + start_of_found_block, length); + } + page_pos+= length; + /* next continous block starts here */ + start_of_found_block= offset; + diff= offset - page_pos; + } + int2store(end, offset - diff); /* correct current pos */ + next_free_pos= offset + row_length; + } + } + if (page_pos != start_of_found_block) + { + uint length= (next_free_pos - start_of_found_block); + memmove(buff + page_pos, buff + start_of_found_block, length); + } + start_of_found_block= uint2korr(dir); + + if (rownr != max_entry - 1) + { + /* Move all entries after rownr to end of page */ + uint rownr_length; + next_free_pos= end_of_found_block= page_pos= + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; + diff= 0; + /* End points to entry before 'rownr' */ + for (dir= buff + end_of_found_block ; dir <= end ; dir+= DIR_ENTRY_SIZE) + { + uint offset= uint2korr(dir); + uint row_length= uint2korr(dir + 2); + uint row_end= offset + row_length; + if (!offset) + continue; + DBUG_ASSERT(offset >= start_of_found_block && row_end <= next_free_pos); + + if (row_end != next_free_pos) + { + uint length= (end_of_found_block - next_free_pos); + if (page_pos != end_of_found_block) + { + /* move next block down */ + memmove(buff + page_pos - length, buff + next_free_pos, length); + } + page_pos-= length; + /* next continous block starts here */ + end_of_found_block= row_end; + diff= page_pos - row_end; + } + int2store(dir, offset + diff); /* correct current pos */ + next_free_pos= offset; + } + if (page_pos != end_of_found_block) + { + uint length= (end_of_found_block - next_free_pos); + memmove(buff + page_pos - length, buff + next_free_pos, length); + next_free_pos= page_pos- length; + } + /* Extend rownr block to cover hole */ + rownr_length= next_free_pos - start_of_found_block; + int2store(dir+2, rownr_length); + } + else + { + /* Extend last block cover whole page */ + uint length= (uint) (dir - buff) - start_of_found_block; + int2store(dir+2, length); + + buff[PAGE_TYPE_OFFSET]&= ~(byte) PAGE_CAN_BE_COMPACTED; + } + DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); + DBUG_VOID_RETURN; +} + + + +/* + Read or initialize new head or tail page + + SYNOPSIS + get_head_or_tail_page() + info Maria handler + block Block to read + buff Suggest this buffer to key cache + length Minimum space needed + page_type HEAD_PAGE || TAIL_PAGE + res Store result position here + + NOTES + We don't decremented buff[EMPTY_SPACE_OFFSET] with the allocated data + as we don't know how much data the caller will actually use. + + RETURN + 0 ok All slots in 'res' are updated + 1 error my_errno is set +*/ + +struct st_row_pos_info +{ + byte *buff; /* page buffer */ + byte *data; /* Place for data */ + byte *dir; /* Directory */ + uint length; /* Length for data */ + uint offset; /* Offset to directory */ + uint empty_space; /* Space left on page */ +}; + +static my_bool get_head_or_tail_page(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *buff, uint length, uint page_type, + struct st_row_pos_info *res) +{ + uint block_size; + DBUG_ENTER("get_head_or_tail_page"); + + block_size= info->s->block_size; + if (block->org_bitmap_value == 0) /* Empty block */ + { + /* New page */ + bzero(buff, PAGE_HEADER_SIZE); + + /* + We zero the rest of the block to avoid getting old memory information + to disk and to allow the file to be compressed better if archived. + The rest of the code does not assume the block is zeroed above + PAGE_OVERHEAD_SIZE + */ + bzero(buff+ PAGE_HEADER_SIZE + length, + block_size - length - PAGE_HEADER_SIZE - DIR_ENTRY_SIZE - + PAGE_SUFFIX_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) page_type; + buff[DIR_ENTRY_OFFSET]= 1; + res->buff= buff; + res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); + res->data= (buff + PAGE_HEADER_SIZE); + res->dir= res->data + res->length; + /* Store poistion to the first row */ + int2store(res->dir, PAGE_HEADER_SIZE); + res->offset= 0; + DBUG_ASSERT(length <= res->length); + } + else + { + byte *dir; + /* Read old page */ + if (!(res->buff= key_cache_read(info->s->key_cache, + info->dfile, + (my_off_t) block->page * block_size, 0, + buff, block_size, block_size, 0))) + DBUG_RETURN(1); + DBUG_ASSERT((res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type); + if (!(dir= find_free_position(buff, block_size, &res->offset, + &res->length, &res->empty_space))) + { + if (res->length < length) + { + if (res->empty_space + res->length < length) + { + compact_page(res->buff, block_size, res->offset); + /* All empty space are now after current position */ + res->length= res->empty_space= uint2korr(dir+2); + } + if (res->length < length) + goto crashed; /* Wrong bitmap information */ + } + } + res->dir= dir; + res->data= res->buff + uint2korr(dir); + } + DBUG_RETURN(0); + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_RETURN(1); +} + + +/* + Write tail of non-blob-data or blob + + SYNOPSIS + write_tail() + info Maria handler + block Block to tail page + row_part Data to write to page + length Length of data + + NOTES + block->page_count is updated to the directory offset for the tail + so that we can store the position in the row extent information + + RETURN + 0 ok + block->page_count is set to point (dir entry + TAIL_BIT) + + 1 error; In this case my_errno is set to the error +*/ + +static my_bool write_tail(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *row_part, uint length) +{ + MARIA_SHARE *share= share= info->s; + uint block_size= share->block_size, empty_space; + struct st_row_pos_info row_pos; + my_off_t position; + DBUG_ENTER("write_tail"); + DBUG_PRINT("enter", ("page: %lu length: %u", + (ulong) block->page, length)); + + info->keybuff_used= 1; + if (get_head_or_tail_page(info, block, info->keyread_buff, length, + TAIL_PAGE, &row_pos)) + DBUG_RETURN(1); + + memcpy(row_pos.data, row_part, length); + int2store(row_pos.dir + 2, length); + empty_space= row_pos.empty_space - length; + int2store(row_pos.buff + EMPTY_SPACE_OFFSET, empty_space); + block->page_count= row_pos.offset + TAIL_BIT; + /* + If there is less directory entries free than number of possible tails + we can write for a row, we mark the page full to ensure that we don't + during _ma_bitmap_find_place() allocate more entires on the tail page + than it can hold + */ + block->empty_space= ((uint) ((uchar*) row_pos.buff)[DIR_ENTRY_OFFSET] <= + MAX_ROWS_PER_PAGE - 1 - info->s->base.blobs ? + empty_space : 0); + block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + + /* Increase data file size, if extended */ + position= (my_off_t) block->page * block_size; + if (info->state->data_file_length <= position) + info->state->data_file_length= position + block_size; + DBUG_RETURN(key_cache_write(share->key_cache, + info->dfile, position, 0, + row_pos.buff, block_size, block_size, 1)); +} + + +/* + Write full pages + + SYNOPSIS + write_full_pages() + info Maria handler + block Where to write data + data Data to write + length Length of data + +*/ + +static my_bool write_full_pages(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *data, ulong length) +{ + my_off_t page; + MARIA_SHARE *share= share= info->s; + uint block_size= share->block_size; + uint data_size= FULL_PAGE_SIZE(block_size); + byte *buff= info->keyread_buff; + uint page_count; + my_off_t position; + DBUG_ENTER("write_full_pages"); + DBUG_PRINT("enter", ("length: %lu page: %lu page_count: %lu", + length, (ulong) block->page, block->page_count)); + + info->keybuff_used= 1; + page= block->page; + page_count= block->page_count; + + position= (my_off_t) (page + page_count) * block_size; + if (info->state->data_file_length < position) + info->state->data_file_length= position; + + /* Increase data file size, if extended */ + + for (; length; data+= data_size) + { + uint copy_length; + if (!page_count--) + { + block++; + page= block->page; + page_count= block->page_count - 1; + DBUG_PRINT("info", ("page: %lu page_count: %lu", + (ulong) block->page, block->page_count)); + + position= (page + page_count + 1) * block_size; + if (info->state->data_file_length < position) + info->state->data_file_length= position; + } + bzero(buff, LSN_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) BLOB_PAGE; + copy_length= min(data_size, length); + memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length); + length-= copy_length; + + if (key_cache_write(share->key_cache, + info->dfile, (my_off_t) page * block_size, 0, + buff, block_size, block_size, 1)) + DBUG_RETURN(1); + page++; + block->used= BLOCKUSED_USED; + } + DBUG_RETURN(0); +} + + + + +/* + Store packed extent data + + SYNOPSIS + store_extent_info() + to Store first packed data here + row_extents_second_part Store rest here + first_block First block to store + count Number of blocks + + NOTES + We don't have to store the position for the head block +*/ + +static void store_extent_info(byte *to, + byte *row_extents_second_part, + MARIA_BITMAP_BLOCK *first_block, + uint count) +{ + MARIA_BITMAP_BLOCK *block, *end_block; + uint copy_length; + my_bool first_found= 0; + + for (block= first_block, end_block= first_block+count ; + block < end_block; block++) + { + /* The following is only false for marker blocks */ + if (likely(block->used)) + { + int5store(to, block->page); + int2store(to + 5, block->page_count); + to+= ROW_EXTENT_SIZE; + if (!first_found) + { + first_found= 1; + to= row_extents_second_part; + } + } + } + copy_length= (count -1) * ROW_EXTENT_SIZE; + /* + In some unlikely cases we have allocated to many blocks. Clear this + data. + */ + bzero(to, (my_size_t) (row_extents_second_part + copy_length - to)); +} + +/* + Write a record to a (set of) pages +*/ + +static my_bool write_block_record(MARIA_HA *info, const byte *record, + MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *bitmap_blocks, + struct st_row_pos_info *row_pos) +{ + byte *data, *end_of_data, *tmp_data_used, *tmp_data; + byte *row_extents_first_part, *row_extents_second_part; + byte *field_length_data; + byte *page_buff; + MARIA_BITMAP_BLOCK *block, *head_block; + MARIA_SHARE *share; + MARIA_COLUMNDEF *rec, *end_field; + uint block_size, flag; + ulong *blob_lengths; + my_off_t position; + my_bool row_extents_in_use; + DBUG_ENTER("write_block_record"); + + LINT_INIT(row_extents_first_part); + LINT_INIT(row_extents_second_part); + + share= info->s; + head_block= bitmap_blocks->block; + block_size= share->block_size; + + info->cur_row.lastpos= ma_recordpos(head_block->page, row_pos->offset); + page_buff= row_pos->buff; + data= row_pos->data; + end_of_data= data + row_pos->length; + + /* Write header */ + flag= share->base.default_row_flag; + row_extents_in_use= 0; + if (unlikely(row->total_length > row_pos->length)) + { + /* Need extent */ + if (bitmap_blocks->count <= 1) + goto crashed; /* Wrong in bitmap */ + flag|= ROW_FLAG_EXTENTS; + row_extents_in_use= 1; + } + /* For now we have only a minimum header */ + *data++= (uchar) flag; + if (unlikely(flag & ROW_FLAG_NULLS_EXTENDED)) + *data++= (uchar) (share->base.null_bytes - + share->base.original_null_bytes); + if (row_extents_in_use) + { + /* Store first extent in header */ + store_key_length_inc(data, bitmap_blocks->count - 1); + row_extents_first_part= data; + data+= ROW_EXTENT_SIZE; + } + if (share->base.pack_fields) + store_key_length_inc(data, row->field_lengths_length); + if (share->calc_checksum) + *(data++)= (byte) info->cur_row.checksum; + memcpy(data, record, share->base.null_bytes); + data+= share->base.null_bytes; + memcpy(data, row->empty_bits, share->base.pack_bytes); + data+= share->base.pack_bytes; + + /* + Allocate a buffer of rest of data (except blobs) + + To avoid double copying of data, we copy as many columns that fits into + the page. The rest goes into info->packed_row. + + Using an extra buffer, instead of doing continous writes to different + pages, uses less code and we don't need to have to do a complex call + for every data segment we want to store. + */ + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + row->head_length)) + DBUG_RETURN(1); + + tmp_data_used= 0; /* Either 0 or last used byte in 'data' */ + tmp_data= data; + + if (row_extents_in_use) + { + uint copy_length= (bitmap_blocks->count - 2) * ROW_EXTENT_SIZE; + if (!tmp_data_used && tmp_data + copy_length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + row_extents_second_part= tmp_data; + /* + We will copy the extents here when we have figured out the tail + positions. + */ + tmp_data+= copy_length; + } + + /* Copy fields that has fixed lengths (primary key etc) */ + for (rec= share->rec, end_field= rec + share->base.fixed_not_null_fields; + rec < end_field; rec++) + { + if (!tmp_data_used && tmp_data + rec->length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + memcpy(tmp_data, record + rec->offset, rec->length); + tmp_data+= rec->length; + } + + /* Copy length of data for variable length fields */ + if (!tmp_data_used && tmp_data + row->field_lengths_length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + field_length_data= row->field_lengths; + memcpy(tmp_data, field_length_data, row->field_lengths_length); + tmp_data+= row->field_lengths_length; + + /* Copy variable length fields and fields with null/zero */ + for (end_field= share->rec + share->base.fields - share->base.blobs; + rec < end_field ; + rec++) + { + const byte *field_pos; + ulong length; + if ((record[rec->null_pos] & rec->null_bit) || + (row->empty_bits[rec->empty_pos] & rec->empty_bit)) + continue; + + field_pos= record + rec->offset; + switch ((enum en_fieldtype) rec->type) { + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_SKIP_PRESPACE: + case FIELD_SKIP_ZERO: /* Fixed length field */ + length= rec->length; + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + /* Char that is space filled */ + if (rec->length <= 255) + length= (uint) (uchar) *field_length_data++; + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + } + break; + case FIELD_VARCHAR: + if (rec->length <= 256) + { + length= (uint) (uchar) *field_length_data++; + field_pos++; /* Skip length byte */ + } + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + field_pos+= 2; + } + break; + default: /* Wrong data */ + DBUG_ASSERT(0); + break; + } + if (!tmp_data_used && tmp_data + length > end_of_data) + { + /* Data didn't fit in page; Change to use tmp buffer */ + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + memcpy((char*) tmp_data, (char*) field_pos, length); + tmp_data+= length; + } + + block= head_block + head_block->sub_blocks; /* Point to first blob data */ + + end_field= rec + share->base.blobs; + blob_lengths= row->blob_lengths; + if (!tmp_data_used) + { + /* Still room on page; Copy as many blobs we can into this page */ + data= tmp_data; + for (; rec < end_field && *blob_lengths < (ulong) (end_of_data - data); + rec++, blob_lengths++) + { + byte *tmp_pos; + uint length; + if (!*blob_lengths) /* Null or "" */ + continue; + length= rec->length - maria_portable_sizeof_char_ptr; + memcpy_fixed((byte*) &tmp_pos, record + rec->offset + length, + sizeof(char*)); + memcpy(data, tmp_pos, *blob_lengths); + data+= *blob_lengths; + /* Skip over tail page that was to be used to store blob */ + block++; + bitmap_blocks->tail_page_skipped= 1; + } + if (head_block->sub_blocks > 1) + { + /* We have allocated pages that where not used */ + bitmap_blocks->page_skipped= 1; + } + } + else + data= tmp_data_used; /* Get last used on page */ + + { + /* Update page directory */ + uint length= (uint) (data - row_pos->data); + DBUG_PRINT("info", ("head length: %u", length)); + if (length < info->s->base.min_row_length) + length= info->s->base.min_row_length; + + int2store(row_pos->dir + 2, length); + /* update empty space at start of block */ + row_pos->empty_space-= length; + int2store(page_buff + EMPTY_SPACE_OFFSET, row_pos->empty_space); + /* Mark in bitmaps how the current page was actually used */ + head_block->empty_space= row_pos->empty_space; + if (page_buff[DIR_ENTRY_OFFSET] == (char) MAX_ROWS_PER_PAGE) + head_block->empty_space= 0; /* Page is full */ + head_block->used= BLOCKUSED_USED; + } + + /* + Now we have to write tail pages, as we need to store the position + to them in the row extent header. + + We first write out all blob tails, to be able to store them in + the current page or 'tmp_data'. + + Then we write the tail of the non-blob fields (The position to the + tail page is stored either in row header, the extents in the head + page or in the first full page of the non-blob data. It's never in + the tail page of the non-blob data) + */ + + if (row_extents_in_use) + { + if (rec != end_field) /* If blob fields */ + { + MARIA_COLUMNDEF *save_rec= rec; + MARIA_BITMAP_BLOCK *save_block= block; + MARIA_BITMAP_BLOCK *end_block; + ulong *save_blob_lengths= blob_lengths; + + for (; rec < end_field; rec++, blob_lengths++) + { + byte *blob_pos; + if (!*blob_lengths) /* Null or "" */ + continue; + if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) + { + uint length; + length= rec->length - maria_portable_sizeof_char_ptr; + memcpy_fixed((byte *) &blob_pos, record + rec->offset + length, + sizeof(char*)); + length= *blob_lengths % FULL_PAGE_SIZE(block_size); /* tail size */ + if (write_tail(info, block + block->sub_blocks-1, + blob_pos + *blob_lengths - length, + length)) + goto disk_err; + } + for (end_block= block + block->sub_blocks; block < end_block; block++) + { + /* + Set only a bit, to not cause bitmap code to belive a block is full + when there is still a lot of entries in it + */ + block->used|= BLOCKUSED_USED; + } + } + rec= save_rec; + block= save_block; + blob_lengths= save_blob_lengths; + } + + if (tmp_data_used) /* non blob data overflows */ + { + MARIA_BITMAP_BLOCK *cur_block, *end_block; + MARIA_BITMAP_BLOCK *head_tail_block= 0; + ulong length; + ulong data_length= (tmp_data - info->rec_buff); + +#ifdef SANITY_CHECK + if (cur_block->sub_blocks == 1) + goto crashed; /* no reserved full or tails */ +#endif + + /* + Find out where to write tail for non-blob fields. + + Problem here is that the bitmap code may have allocated more + space than we need. We have to handle the following cases: + + - Bitmap code allocated a tail page we don't need. + - The last full page allocated needs to be changed to a tail page + (Because we put more data than we thought on the head page) + + The reserved pages in bitmap_blocks for the main page has one of + the following allocations: + - Full pages, with following blocks: + # * full pages + empty page ; To be used if we change last full to tail page. This + has 'count' = 0. + tail page (optional, if last full page was part full) + - One tail page + */ + + cur_block= head_block + 1; + end_block= head_block + head_block->sub_blocks; + while (data_length >= (length= (cur_block->page_count * + FULL_PAGE_SIZE(block_size)))) + { +#ifdef SANITY_CHECK + if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT)) + goto crashed; +#endif + data_length-= length; + (cur_block++)->used= BLOCKUSED_USED; + } + if (data_length) + { +#ifdef SANITY_CHECK + if ((cur_block == end_block)) + goto crashed; +#endif + if (cur_block->used & BLOCKUSED_TAIL) + { + DBUG_ASSERT(data_length < MAX_TAIL_SIZE(block_size)); + /* tail written to full tail page */ + cur_block->used= BLOCKUSED_USED; + head_tail_block= cur_block; + } + else if (data_length > length - MAX_TAIL_SIZE(block_size)) + { + /* tail written to full page */ + cur_block->used= BLOCKUSED_USED; + if ((cur_block != end_block - 1) && + (end_block[-1].used & BLOCKUSED_TAIL)) + bitmap_blocks->tail_page_skipped= 1; + } + else + { + /* + cur_block is a full block, followed by an empty and optional + tail block. Change cur_block to a tail block or split it + into full blocks and tail blocks. + */ + DBUG_ASSERT(cur_block[1].page_count == 0); + if (cur_block->page_count == 1) + { + /* convert full block to tail block */ + cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + head_tail_block= cur_block; + } + else + { + DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(block_size)); + DBUG_PRINT("info", ("Splitting blocks into full and tail")); + cur_block[1].page= (cur_block->page + cur_block->page_count - 1); + cur_block[1].page_count= 1; + cur_block[1].used= 1; + cur_block->page_count--; + cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + head_tail_block= cur_block + 1; + } + if (end_block[-1].used & BLOCKUSED_TAIL) + bitmap_blocks->tail_page_skipped= 1; + } + } + else + { + /* Must be an empty or tail page */ + DBUG_ASSERT(cur_block->page_count == 0 || + cur_block->used & BLOCKUSED_TAIL); + if (end_block[-1].used & BLOCKUSED_TAIL) + bitmap_blocks->tail_page_skipped= 1; + } + + /* + Write all extents into page or tmp_buff + + Note that we still don't have a correct position for the tail + of the non-blob fields. + */ + store_extent_info(row_extents_first_part, + row_extents_second_part, + head_block+1, bitmap_blocks->count - 1); + if (head_tail_block) + { + ulong data_length= (tmp_data - info->rec_buff); + uint length; + byte *extent_data; + + length= (uint) (data_length % FULL_PAGE_SIZE(block_size)); + if (write_tail(info, head_tail_block, data + data_length - length, + length)) + goto disk_err; + tmp_data-= length; /* Remove the tail */ + + /* Store the tail position for the non-blob fields */ + if (head_tail_block == head_block + 1) + extent_data= row_extents_first_part; + else + extent_data= row_extents_second_part + + ((head_tail_block - head_block) - 2) * ROW_EXTENT_SIZE; + int5store(extent_data, head_tail_block->page); + int2store(extent_data + 5, head_tail_block->page_count); + } + } + else + store_extent_info(row_extents_first_part, + row_extents_second_part, + head_block+1, bitmap_blocks->count - 1); + } + /* Increase data file size, if extended */ + position= (my_off_t) head_block->page * block_size; + if (info->state->data_file_length <= position) + info->state->data_file_length= position + block_size; + if (key_cache_write(share->key_cache, + info->dfile, position, 0, + page_buff, share->block_size, share->block_size, 1)) + goto disk_err; + + if (tmp_data_used) + { + /* Write data stored in info->rec_buff to pages */ + DBUG_ASSERT(bitmap_blocks->count != 0); + if (write_full_pages(info, bitmap_blocks->block + 1, info->rec_buff, + (ulong) (tmp_data - info->rec_buff))) + goto disk_err; + } + + /* Write rest of blobs (data, but no tails as they are already written) */ + for (; rec < end_field; rec++, blob_lengths++) + { + byte *blob_pos; + uint length; + ulong blob_length; + if (!*blob_lengths) /* Null or "" */ + continue; + length= rec->length - maria_portable_sizeof_char_ptr; + memcpy_fixed((byte*) &blob_pos, record + rec->offset + length, + sizeof(char*)); + /* remove tail part */ + blob_length= *blob_lengths; + if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) + blob_length-= (blob_length % FULL_PAGE_SIZE(block_size)); + + if (write_full_pages(info, block, blob_pos, blob_length)) + goto disk_err; + block+= block->sub_blocks; + } + /* Release not used space in used pages */ + if (_ma_bitmap_release_unused(info, bitmap_blocks)) + goto disk_err; + DBUG_RETURN(0); + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ +disk_err: + /* Something was wrong with data on record */ + DBUG_RETURN(1); +} + + +/* + Write a record (to get the row id for it) + + SYNOPSIS + _ma_write_init_block_record() + info Maria handler + record Record to write + + NOTES + This is done BEFORE we write the keys to the row! + + RETURN + HA_OFFSET_ERROR Something went wrong + # Rowid for row +*/ + +MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info, + const byte *record) +{ + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + struct st_row_pos_info row_pos; + DBUG_ENTER("_ma_write_init_block_record"); + + calc_record_size(info, record, &info->cur_row); + if (_ma_bitmap_find_place(info, &info->cur_row, blocks)) + DBUG_RETURN(HA_OFFSET_ERROR); /* Error reding bitmap */ + if (get_head_or_tail_page(info, blocks->block, info->buff, + info->s->base.min_row_length, HEAD_PAGE, &row_pos)) + DBUG_RETURN(HA_OFFSET_ERROR); + info->cur_row.lastpos= ma_recordpos(blocks->block->page, row_pos.offset); + if (info->s->calc_checksum) + info->cur_row.checksum= (info->s->calc_checksum)(info,record); + if (write_block_record(info, record, &info->cur_row, blocks, &row_pos)) + DBUG_RETURN(HA_OFFSET_ERROR); /* Error reading bitmap */ + DBUG_PRINT("exit", ("Rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(info->cur_row.lastpos); +} + + +/* + Dummy function for (*info->s->write_record)() + + Nothing to do here, as we already wrote the record in + _ma_write_init_block_record() +*/ + +my_bool _ma_write_block_record(MARIA_HA *info __attribute__ ((unused)), + const byte *record __attribute__ ((unused)) +) +{ + return 0; /* Row already written */ +} + + +/* + Remove row written by _ma_write_block_record + + SYNOPSIS + _ma_abort_write_block_record() + info Maria handler + + INFORMATION + This is called in case we got a duplicate unique key while + writing keys. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_write_abort_block_record(MARIA_HA *info) +{ + my_bool res= 0; + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + MARIA_BITMAP_BLOCK *block, *end; + DBUG_ENTER("_ma_abort_write_block_record"); + + if (delete_head_or_tail(info, + ma_recordpos_to_page(info->cur_row.lastpos), + ma_recordpos_to_offset(info->cur_row.lastpos), 1)) + res= 1; + for (block= blocks->block + 1, end= block + blocks->count - 1; block < end; + block++) + { + if (block->used & BLOCKUSED_TAIL) + { + /* + block->page_count is set to the tail directory entry number in + write_block_record() + */ + if (delete_head_or_tail(info, block->page, block->page_count & ~TAIL_BIT, + 0)) + res= 1; + } + else + { + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + if (_ma_reset_full_page_bits(info, &info->s->bitmap, block->page, + block->page_count)) + res= 1; + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + } + } + DBUG_RETURN(res); +} + + +/* + Update a record + + NOTES + For the moment, we assume that info->curr_row.extents is always updated + when a row is read. In the future we may decide to read this on demand + for rows split into many extents. +*/ + +my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos, + const byte *record) +{ + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + byte *buff; + MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row; + uint rownr, org_empty_size, head_length; + uint block_size= info->s->block_size; + byte *dir; + ulonglong page; + struct st_row_pos_info row_pos; + DBUG_ENTER("_ma_update_block_record"); + DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + + calc_record_size(info, record, new_row); + page= ma_recordpos_to_page(record_pos); + + if (!(buff= key_cache_read(info->s->key_cache, + info->dfile, (my_off_t) page * block_size, 0, + info->buff, block_size, block_size, 0))) + DBUG_RETURN(1); + org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET); + rownr= ma_recordpos_to_offset(record_pos); + dir= (buff + block_size - DIR_ENTRY_SIZE * rownr - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + + if ((org_empty_size + cur_row->head_length) >= new_row->total_length) + { + uint empty, offset, length; + MARIA_BITMAP_BLOCK block; + + /* + We can fit the new row in the same page as the original head part + of the row + */ + block.org_bitmap_value= _ma_free_size_to_head_pattern(&info->s->bitmap, + org_empty_size); + offset= uint2korr(dir); + length= uint2korr(dir + 2); + empty= 0; + if (new_row->total_length > length) + { + /* See if there is empty space after */ + if (rownr != (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET] - 1) + empty= empty_pos_after_row(dir) - (offset + length); + if (new_row->total_length > length + empty) + { + compact_page(buff, info->s->block_size, rownr); + org_empty_size= 0; + length= uint2korr(dir + 2); + } + } + row_pos.buff= buff; + row_pos.offset= rownr; + row_pos.empty_space= org_empty_size + length; + row_pos.dir= dir; + row_pos.data= buff + uint2korr(dir); + row_pos.length= length + empty; + blocks->block= █ + blocks->count= 1; + block.page= page; + block.sub_blocks= 1; + block.used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP; + block.empty_space= row_pos.empty_space; + /* Update cur_row, if someone calls update at once again */ + cur_row->head_length= new_row->total_length; + if (_ma_bitmap_free_full_pages(info, cur_row->extents, + cur_row->extents_count)) + DBUG_RETURN(1); + DBUG_RETURN(write_block_record(info, record, new_row, blocks, &row_pos)); + } + /* + Allocate all size in block for record + QQ: Need to improve this to do compact if we can fit one more blob into + the head page + */ + head_length= uint2korr(dir + 2); + if (buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED && org_empty_size && + (head_length < new_row->head_length || + (new_row->total_length <= head_length && + org_empty_size + head_length >= new_row->total_length))) + { + compact_page(buff, info->s->block_size, rownr); + org_empty_size= 0; + head_length= uint2korr(dir + 2); + } + + /* Delete old row */ + if (delete_tails(info, cur_row->tail_positions)) + DBUG_RETURN(1); + if (_ma_bitmap_free_full_pages(info, cur_row->extents, + cur_row->extents_count)) + DBUG_RETURN(1); + if (_ma_bitmap_find_new_place(info, new_row, page, head_length, blocks)) + DBUG_RETURN(1); + + row_pos.buff= buff; + row_pos.offset= rownr; + row_pos.empty_space= org_empty_size + head_length; + row_pos.dir= dir; + row_pos.data= buff + uint2korr(dir); + row_pos.length= head_length; + DBUG_RETURN(write_block_record(info, record, new_row, blocks, &row_pos)); +} + + +/* + Delete a head a tail part + + SYNOPSIS + delete_head_or_tail() + info Maria handler + page Page (not file offset!) on which the row is + head 1 if this is a head page + + NOTES + Uses info->keyread_buff + + RETURN + 0 ok + 1 error +*/ + +static my_bool delete_head_or_tail(MARIA_HA *info, + ulonglong page, uint record_number, + my_bool head) +{ + MARIA_SHARE *share= info->s; + uint number_of_records, empty_space, length; + uint block_size= share->block_size; + byte *buff, *dir; + my_off_t position; + DBUG_ENTER("delete_head_or_tail"); + + info->keybuff_used= 1; + if (!(buff= key_cache_read(share->key_cache, + info->dfile, page * block_size, 0, + info->keyread_buff, + block_size, block_size, 0))) + DBUG_RETURN(1); + + number_of_records= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET]; +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > MAX_ROWS_PER_PAGE || + record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - + PAGE_SUFFIX_SIZE) / (DIR_ENTRY_SIZE + MIN_TAIL_SIZE))) + { + DBUG_PRINT("error", ("record_number: %u number_of_records: %u", + record_number, number_of_records)); + DBUG_RETURN(1); + } +#endif + + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + dir[0]= dir[1]= 0; /* Delete entry */ + length= uint2korr(dir + 2); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + if (record_number == number_of_records - 1) + { + /* Delete this entry and all following empty directory entries */ + byte *end= buff + block_size - PAGE_SUFFIX_SIZE; + do + { + number_of_records--; + dir+= DIR_ENTRY_SIZE; + empty_space+= DIR_ENTRY_SIZE; + } while (dir < end && dir[0] == 0 && dir[1] == 0); + buff[DIR_ENTRY_OFFSET]= (byte) (uchar) number_of_records; + } + empty_space+= length; + if (number_of_records != 0) + { + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; + position= (my_off_t) page * block_size; + if (key_cache_write(share->key_cache, + info->dfile, position, 0, + buff, block_size, block_size, 1)) + DBUG_RETURN(1); + } + else + { + DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]); + } + DBUG_PRINT("info", ("empty_space: %u", empty_space)); + DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); +} + + +/* + delete all tails + + SYNOPSIS + delete_tails() + info Handler + tails Pointer to vector of tail positions, ending with 0 + + NOTES + Uses info->keyread_buff + + RETURN + 0 ok + 1 error +*/ + +static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails) +{ + my_bool res= 0; + DBUG_ENTER("delete_tails"); + for (; *tails; tails++) + { + if (delete_head_or_tail(info, + ma_recordpos_to_page(*tails), + ma_recordpos_to_offset(*tails), 0)) + res= 1; + } + DBUG_RETURN(res); +} + + +/* + Delete a record + + NOTES + For the moment, we assume that info->cur_row.extents is always updated + when a row is read. In the future we may decide to read this on demand + for rows with many splits. +*/ + +my_bool _ma_delete_block_record(MARIA_HA *info) +{ + DBUG_ENTER("_ma_delete_block_record"); + if (delete_head_or_tail(info, + ma_recordpos_to_page(info->cur_row.lastpos), + ma_recordpos_to_offset(info->cur_row.lastpos), + 1) || + delete_tails(info, info->cur_row.tail_positions)) + DBUG_RETURN(1); + DBUG_RETURN(_ma_bitmap_free_full_pages(info, info->cur_row.extents, + info->cur_row.extents_count)); +} + + +/**************************************************************************** + Reading of records +****************************************************************************/ + +/* + Read position to record from record directory at end of page + + SYNOPSIS + get_record_position() + buff page buffer + block_size block size for page + record_number Record number in index + end_of_data pointer to end of data for record + + RETURN + 0 Error in data + # Pointer to start of record. + In this case *end_of_data is set. +*/ + +static byte *get_record_position(byte *buff, uint block_size, + uint record_number, byte **end_of_data) +{ + uint number_of_records= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET]; + byte *dir; + byte *data; + uint offset, length; + +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > MAX_ROWS_PER_PAGE || + record_number > ((block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) / + (DIR_ENTRY_SIZE + MIN_TAIL_SIZE))) + { + DBUG_PRINT("error", + ("Wrong row number: record_number: %u number_of_records: %u", + record_number, number_of_records)); + return 0; + } +#endif + + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + offset= uint2korr(dir); + length= uint2korr(dir + 2); +#ifdef SANITY_CHECKS + if (offset < PAGE_HEADER_SIZE || + offset + length > (block_size - + number_of_records * DIR_ENTRY_SIZE - + PAGE_SUFFIX_SIZE)) + { + DBUG_PRINT("error", + ("Wrong row position: record_number: %u offset: %u " + "length: %u number_of_records: %u", + record_number, offset, length, number_of_records)); + return 0; + } +#endif + data= buff + offset; + *end_of_data= data + length; + return data; +} + + +/* + Init extent + + NOTES + extent is a cursor over which pages to read +*/ + +static void init_extent(MARIA_EXTENT_CURSOR *extent, byte *extent_info, + uint extents, MARIA_RECORD_POS *tail_positions) +{ + uint page_count; + extent->extent= extent_info; + extent->extent_count= extents; + extent->page= uint5korr(extent_info); /* First extent */ + page_count= uint2korr(extent_info+5); + extent->page_count= page_count & ~TAIL_BIT; + extent->tail= page_count & TAIL_BIT; + extent->tail_positions= tail_positions; +} + + +/* + Read next extent + + SYNOPSIS + read_next_extent() + info Maria handler + extent Pointer to current extent (this is updated to point + to next) + end_of_data Pointer to end of data in read block (out) + + NOTES + New block is read into info->buff + + RETURN + 0 Error; my_errno is set + # Pointer to start of data in read block + In this case end_of_data is updated to point to end of data. +*/ + +static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, + byte **end_of_data) +{ + MARIA_SHARE *share= info->s; + byte *buff, *data; + DBUG_ENTER("read_next_extent"); + + if (!extent->page_count) + { + uint page_count; + if (!--extent->extent_count) + goto crashed; + extent->extent+= ROW_EXTENT_SIZE; + extent->page= uint5korr(extent->extent); + page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE); + extent->tail= page_count & TAIL_BIT; + extent->page_count= (page_count & ~TAIL_BIT); + extent->first_extent= 0; + DBUG_PRINT("info",("New extent. Page: %lu page_count: %u tail_flag: %d", + (ulong) extent->page, extent->page_count, + extent->tail != 0)); + } + + if (info->cur_row.empty_bits != info->cur_row.empty_bits_buffer) + { + /* + First read of extents: Move data from info->buff to + internals buffers. + */ + memcpy(info->cur_row.empty_bits_buffer, info->cur_row.empty_bits, + share->base.pack_bytes); + info->cur_row.empty_bits= info->cur_row.empty_bits_buffer; + } + + if (!(buff= key_cache_read(share->key_cache, + info->dfile, extent->page * share->block_size, 0, + info->buff, + share->block_size, share->block_size, 0))) + { + /* check if we tried to read over end of file (ie: bad data in record) */ + if ((extent->page + 1) * share->block_size > info->state->data_file_length) + goto crashed; + DBUG_RETURN(0); + } + if (!extent->tail) + { + /* Full data page */ + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE); + extent->page++; /* point to next page */ + extent->page_count--; + *end_of_data= buff + share->block_size; + info->cur_row.full_page_count++; /* For maria_chk */ + DBUG_RETURN(extent->data_start= buff + LSN_SIZE + PAGE_TYPE_SIZE); + } + /* Found tail. page_count is in this case the position in the tail page */ + + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE); + *(extent->tail_positions++)= ma_recordpos(extent->page, + extent->page_count); + info->cur_row.tail_count++; /* For maria_chk */ + + if (!(data= get_record_position(buff, share->block_size, + extent->page_count, + end_of_data))) + goto crashed; + extent->data_start= data; + extent->page_count= 0; /* No more data in extent */ + DBUG_RETURN(data); + + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_PRINT("error", ("wrong extent information")); + DBUG_RETURN(0); +} + + +/* + Read data that may be split over many blocks + + SYNOPSIS + read_long_data() + info Maria handler + to Store result string here (this is allocated) + extent Pointer to current extent position + data Current position in buffer + end_of_data End of data in buffer + + NOTES + When we have to read a new buffer, it's read into info->buff + + This loop is implemented by goto's instead of a for() loop as + the code is notable smaller and faster this way (and it's not nice + to jump into a for loop() or into a 'then' clause) + + RETURN + 0 ok + 1 error +*/ + +static my_bool read_long_data(MARIA_HA *info, byte *to, ulong length, + MARIA_EXTENT_CURSOR *extent, + byte **data, byte **end_of_data) +{ + DBUG_ENTER("read_long_data"); + DBUG_PRINT("enter", ("length: %lu", length)); + DBUG_ASSERT(*data <= *end_of_data); + + for(;;) + { + uint left_length; + left_length= (uint) (*end_of_data - *data); + if (likely(left_length >= length)) + { + memcpy(to, *data, length); + (*data)+= length; + DBUG_RETURN(0); + } + memcpy(to, *data, left_length); + to+= left_length; + length-= left_length; + if (!(*data= read_next_extent(info, extent, end_of_data))) + break; + } + DBUG_RETURN(1); +} + + +/* + Read a record from page (helper function for _ma_read_block_record()) + + SYNOPSIS + _ma_read_block_record2() + info Maria handler + record Store record here + data Start of head data for row + end_of_data End of data for row + + NOTES + The head page is already read by caller + Following data is update in info->cur_row: + + cur_row.head_length is set to size of entry in head block + cur_row.tail_positions is set to point to all tail blocks + cur_row.extents points to extents data + cur_row.extents_counts contains number of extents + cur_row.empty_bits points to empty bits part in read record + cur_row.field_lengths contains packed length of all fields + + RETURN + 0 ok + # Error code +*/ + +int _ma_read_block_record2(MARIA_HA *info, byte *record, + byte *data, byte *end_of_data) +{ + MARIA_SHARE *share= info->s; + byte *field_length_data, *blob_buffer, *start_of_data; + uint flag, null_bytes, cur_null_bytes, row_extents, field_lengths; + my_bool found_blob= 0; + MARIA_EXTENT_CURSOR extent; + MARIA_COLUMNDEF *rec, *end_field; + DBUG_ENTER("_ma_read_block_record2"); + + LINT_INIT(field_lengths); + LINT_INIT(field_length_data); + LINT_INIT(blob_buffer); + + start_of_data= data; + flag= (uint) (uchar) data[0]; + cur_null_bytes= share->base.original_null_bytes; + null_bytes= share->base.null_bytes; + info->cur_row.head_length= (uint) (end_of_data - data); + info->cur_row.full_page_count= info->cur_row.tail_count= 0; + + /* Skip trans header (for now, until we have MVCC csupport) */ + data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)]; + if (flag & ROW_FLAG_NULLS_EXTENDED) + cur_null_bytes+= data[-1]; + + row_extents= 0; + if (flag & ROW_FLAG_EXTENTS) + { + uint row_extent_size; + /* + Record is split over many data pages. + Get number of extents and first extent + */ + get_key_length(row_extents, data); + info->cur_row.extents_count= row_extents; + row_extent_size= row_extents * ROW_EXTENT_SIZE; + if (info->cur_row.extents_buffer_length < row_extent_size && + _ma_alloc_buffer(&info->cur_row.extents, + &info->cur_row.extents_buffer_length, + row_extent_size)) + DBUG_RETURN(my_errno); + memcpy(info->cur_row.extents, data, ROW_EXTENT_SIZE); + data+= ROW_EXTENT_SIZE; + init_extent(&extent, info->cur_row.extents, row_extents, + info->cur_row.tail_positions); + } + else + { + info->cur_row.extents_count= 0; + (*info->cur_row.tail_positions)= 0; + extent.page_count= 0; + extent.extent_count= 1; + } + extent.first_extent= 1; + + if (share->base.max_field_lengths) + { + get_key_length(field_lengths, data); +#ifdef SANITY_CHECKS + if (field_lengths > share->base.max_field_lengths) + goto err; +#endif + } + + if (share->calc_checksum) + info->cur_row.checksum= (uint) (uchar) *data++; + /* data now points on null bits */ + memcpy(record, data, cur_null_bytes); + if (unlikely(cur_null_bytes != null_bytes)) + { + /* + This only happens if we have added more NULL columns with + ALTER TABLE and are fetching an old, not yet modified old row + */ + bzero(record + cur_null_bytes, (uint) (null_bytes - cur_null_bytes)); + } + data+= null_bytes; + info->cur_row.empty_bits= (byte*) data; /* Pointer to empty bitmask */ + data+= share->base.pack_bytes; + + /* TODO: Use field offsets, instead of just skipping them */ + data+= share->base.field_offsets * FIELD_OFFSET_SIZE; + + /* + Read row extents (note that first extent was already read into + info->cur_row.extents above) + */ + if (row_extents) + { + if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE, + (row_extents - 1) * ROW_EXTENT_SIZE, + &extent, &data, &end_of_data)) + DBUG_RETURN(my_errno); + } + + /* + Data now points to start of fixed length field data that can't be null + or 'empty'. Note that these fields can't be split over blocks + */ + for (rec= share->rec, end_field= rec + share->base.fixed_not_null_fields; + rec < end_field; rec++) + { + uint rec_length= rec->length; + if (data >= end_of_data && + !(data= read_next_extent(info, &extent, &end_of_data))) + goto err; + memcpy(record + rec->offset, data, rec_length); + data+= rec_length; + } + + /* Read array of field lengths. This may be stored in several extents */ + if (share->base.max_field_lengths) + { + field_length_data= info->cur_row.field_lengths; + if (read_long_data(info, field_length_data, field_lengths, &extent, + &data, &end_of_data)) + DBUG_RETURN(my_errno); + } + + /* Read variable length data. Each of these may be split over many extents */ + for (end_field= share->rec + share->base.fields; rec < end_field; rec++) + { + enum en_fieldtype type= (enum en_fieldtype) rec->type; + byte *field_pos= record + rec->offset; + /* First check if field is present in record */ + if (record[rec->null_pos] & rec->null_bit) + continue; + else if (info->cur_row.empty_bits[rec->empty_pos] & rec->empty_bit) + { + if (type == FIELD_SKIP_ENDSPACE) + bfill(record + rec->offset, rec->length, ' '); + else + bzero(record + rec->offset, rec->fill_length); + continue; + } + switch (type) { + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_SKIP_PRESPACE: + case FIELD_SKIP_ZERO: /* Fixed length field */ + if (data >= end_of_data && + !(data= read_next_extent(info, &extent, &end_of_data))) + goto err; + memcpy(field_pos, data, rec->length); + data+= rec->length; + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + /* Char that is space filled */ + uint length; + if (rec->length <= 255) + length= (uint) (uchar) *field_length_data++; + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + } +#ifdef SANITY_CHECKS + if (length > rec->length) + goto err; +#endif + if (read_long_data(info, field_pos, length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + bfill(field_pos + length, rec->length - length, ' '); + break; + } + case FIELD_VARCHAR: + { + ulong length; + if (rec->length <= 256) + { + length= (uint) (uchar) (*field_pos++= *field_length_data++); + } + else + { + length= uint2korr(field_length_data); + field_pos[0]= field_length_data[0]; + field_pos[1]= field_length_data[1]; + field_pos+= 2; + field_length_data+= 2; + } + if (read_long_data(info, field_pos, length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + break; + } + case FIELD_BLOB: + { + uint size_length= rec->length - maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_length_data); + + if (!found_blob) + { + /* Calculate total length for all blobs */ + ulong blob_lengths= 0; + byte *length_data= field_length_data; + MARIA_COLUMNDEF *blob_field= rec; + + found_blob= 1; + for (; blob_field < end_field; blob_field++) + { + uint size_length; + if ((record[blob_field->null_pos] & blob_field->null_bit) || + (info->cur_row.empty_bits[blob_field->empty_pos] & + blob_field->empty_bit)) + continue; + size_length= blob_field->length - maria_portable_sizeof_char_ptr; + blob_lengths+= _ma_calc_blob_length(size_length, length_data); + length_data+= size_length; + } + DBUG_PRINT("info", ("Total blob length: %lu", blob_lengths)); + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + blob_lengths)) + DBUG_RETURN(my_errno); + blob_buffer= info->rec_buff; + } + + memcpy(field_pos, field_length_data, size_length); + memcpy_fixed(field_pos + size_length, (byte *) & blob_buffer, + sizeof(char*)); + field_length_data+= size_length; + + /* + After we have read one extent, then each blob is in it's own extent + */ + if (extent.first_extent && (ulong) (end_of_data - data) < blob_length) + end_of_data= data; /* Force read of next extent */ + + if (read_long_data(info, blob_buffer, blob_length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + blob_buffer+= blob_length; + break; + } +#ifdef EXTRA_DEBUG + default: + DBUG_ASSERT(0); /* purecov: deadcode */ + goto err; +#endif + } + continue; + } + + if (row_extents) + { + DBUG_PRINT("info", ("Row read: page_count: %lu extent_count: %lu", + extent.page_count, extent.extent_count)); + *extent.tail_positions= 0; /* End marker */ + if (extent.page_count) + goto err; + if (extent.extent_count > 1) + if (check_if_zero(extent.extent, + (extent.extent_count-1) * ROW_EXTENT_SIZE)) + goto err; + } + else + { + DBUG_PRINT("info", ("Row read")); + if (data != end_of_data && (uint) (end_of_data - start_of_data) >= + info->s->base.min_row_length) + goto err; + } + + info->update|= HA_STATE_AKTIV; /* We have a aktive record */ + DBUG_RETURN(0); + +err: + /* Something was wrong with data on record */ + DBUG_PRINT("error", ("Found record with wrong data")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); +} + + +/* + Read a record based on record position + + SYNOPSIS + _ma_read_block_record() + info Maria handler + record Store record here + record_pos Record position +*/ + +int _ma_read_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos) +{ + byte *data, *end_of_data, *buff; + my_off_t page; + uint offset; + uint block_size= info->s->block_size; + DBUG_ENTER("_ma_read_block_record"); + DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + + page= ma_recordpos_to_page(record_pos) * block_size; + offset= ma_recordpos_to_offset(record_pos); + + if (!(buff= key_cache_read(info->s->key_cache, + info->dfile, page, 0, info->buff, + block_size, block_size, 1))) + DBUG_RETURN(1); + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE); + if (!(data= get_record_position(buff, block_size, offset, &end_of_data))) + { + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_PRINT("error", ("Wrong directory entry in data block")); + DBUG_RETURN(1); + } + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); +} + + +/* compare unique constraint between stored rows */ + +my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + byte *org_rec_buff, *old_record; + my_size_t org_rec_buff_size; + int error; + DBUG_ENTER("_ma_cmp_block_unique"); + + if (!(old_record= my_alloca(info->s->base.reclength))) + DBUG_RETURN(1); + + /* Don't let the compare destroy blobs that may be in use */ + org_rec_buff= info->rec_buff; + org_rec_buff_size= info->rec_buff_size; + if (info->s->base.blobs) + { + /* Force realloc of record buffer*/ + info->rec_buff= 0; + info->rec_buff_size= 0; + } + error= _ma_read_block_record(info, old_record, pos); + if (!error) + error= _ma_unique_comp(def, record, old_record, def->null_are_equal); + if (info->s->base.blobs) + { + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff= org_rec_buff; + info->rec_buff_size= org_rec_buff_size; + } + DBUG_PRINT("exit", ("result: %d", error)); + my_afree(old_record); + DBUG_RETURN(error != 0); +} + + +/**************************************************************************** + Table scan +****************************************************************************/ + +/* + Allocate buffers for table scan + + SYNOPSIS + _ma_scan_init_block_record(MARIA_HA *info) + + IMPLEMENTATION + We allocate one buffer for the current bitmap and one buffer for the + current page +*/ + +my_bool _ma_scan_init_block_record(MARIA_HA *info) +{ + byte *ptr; + if (!(ptr= (byte *) my_malloc(info->s->block_size * 2, MYF(MY_WME)))) + return (1); + info->scan.bitmap_buff= ptr; + info->scan.page_buff= ptr + info->s->block_size; + info->scan.bitmap_end= info->scan.bitmap_buff + info->s->bitmap.total_size; + + /* Set scan variables to get _ma_scan_block() to start with reading bitmap */ + info->scan.number_of_rows= 0; + info->scan.bitmap_pos= info->scan.bitmap_end; + info->scan.bitmap_page= (ulong) - (long) info->s->bitmap.pages_covered; + /* + We have to flush bitmap as we will read the bitmap from the page cache + while scanning rows + */ + return _ma_flush_bitmap(info->s); +} + + +/* Free buffers allocated by _ma_scan_block_init() */ + +void _ma_scan_end_block_record(MARIA_HA *info) +{ + my_free(info->scan.bitmap_buff, MYF(0)); + info->scan.bitmap_buff= 0; +} + + +/* + Read next record while scanning table + + SYNOPSIS + _ma_scan_block_record() + info Maria handler + record Store found here + record_pos Value stored in info->cur_row.next_pos after last call + skip_deleted + + NOTES + - One must have called mi_scan() before this + - In this version, we don't actually need record_pos, we as easily + use a variable in info->scan + + IMPLEMENTATION + Current code uses a lot of goto's to separate the different kind of + states we may be in. This gives us a minimum of executed if's for + the normal cases. I tried several different ways to code this, but + the current one was in the end the most readable and fastest. + + RETURN + 0 ok + # Error code +*/ + +int _ma_scan_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos, + my_bool skip_deleted __attribute__ ((unused))) +{ + uint block_size; + my_off_t filepos; + DBUG_ENTER("_ma_scan_block_record"); + +restart_record_read: + /* Find next row in current page */ + if (likely(record_pos < info->scan.number_of_rows)) + { + uint length, offset; + byte *data, *end_of_data; + + while (!(offset= uint2korr(info->scan.dir))) + { + info->scan.dir-= DIR_ENTRY_SIZE; + record_pos++; +#ifdef SANITY_CHECKS + if (info->scan.dir < info->scan.dir_end) + goto err; +#endif + } + /* found row */ + info->cur_row.lastpos= info->scan.row_base_page + record_pos; + info->cur_row.nextpos= record_pos + 1; + data= info->scan.page_buff + offset; + length= uint2korr(info->scan.dir + 2); + end_of_data= data + length; + info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ +#ifdef SANITY_CHECKS + if (end_of_data > info->scan.dir_end || + offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length) + goto err; +#endif + DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); + } + + /* Find next head page in current bitmap */ +restart_bitmap_scan: + block_size= info->s->block_size; + if (likely(info->scan.bitmap_pos < info->scan.bitmap_end)) + { + byte *data= info->scan.bitmap_pos; + longlong bits= info->scan.bits; + uint bit_pos= info->scan.bit_pos; + + do + { + while (likely(bits)) + { + uint pattern= bits & 7; + bits >>= 3; + bit_pos++; + if (pattern > 0 && pattern <= 4) + { + /* Found head page; Read it */ + ulong page; + info->scan.bitmap_pos= data; + info->scan.bits= bits; + info->scan.bit_pos= bit_pos; + page= (info->scan.bitmap_page + 1 + + (data - info->scan.bitmap_buff) / 6 * 16 + bit_pos - 1); + info->scan.row_base_page= ma_recordpos(page, 0); + if (!(key_cache_read(info->s->key_cache, + info->dfile, + (my_off_t) page * block_size, + 0, info->scan.page_buff, + block_size, block_size, 0))) + DBUG_RETURN(my_errno); + if (((info->scan.page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != + HEAD_PAGE) || + (info->scan.number_of_rows= + (uint) (uchar) info->scan.page_buff[DIR_ENTRY_OFFSET]) == 0) + { + DBUG_PRINT("error", ("Wrong page header")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); + } + info->scan.dir= (info->scan.page_buff + block_size - + PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); + info->scan.dir_end= (info->scan.dir - + (info->scan.number_of_rows - 1) * + DIR_ENTRY_SIZE); + record_pos= 0; + goto restart_record_read; + } + } + for (data+= 6; data < info->scan.bitmap_end; data+= 6) + { + bits= uint6korr(data); + if (bits && ((bits & LL(04444444444444444)) != LL(04444444444444444))) + break; + } + bit_pos= 0; + } while (data < info->scan.bitmap_end); + } + + /* Read next bitmap */ + info->scan.bitmap_page+= info->s->bitmap.pages_covered; + filepos= (my_off_t) info->scan.bitmap_page * block_size; + if (unlikely(filepos >= info->state->data_file_length)) + { + DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); + } + if (!(key_cache_read(info->s->key_cache, info->dfile, filepos, + 0, info->scan.bitmap_buff, block_size, block_size, 0))) + DBUG_RETURN(my_errno); + /* Skip scanning 'bits' in bitmap scan code */ + info->scan.bitmap_pos= info->scan.bitmap_buff - 6; + info->scan.bits= 0; + goto restart_bitmap_scan; + +err: + DBUG_PRINT("error", ("Wrong data on page")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); +} + + +/* + Compare a row against a stored one + + NOTES + Not implemented, as block record is not supposed to be used in a shared + global environment +*/ + +my_bool _ma_compare_block_record(MARIA_HA *info __attribute__ ((unused)), + const byte *record __attribute__ ((unused))) +{ + return 0; +} + + +#ifndef DBUG_OFF + +static void _ma_print_directory(byte *buff, uint block_size) +{ + uint max_entry= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET], row= 0; + uint end_of_prev_row= PAGE_HEADER_SIZE; + byte *dir, *end; + + dir= buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; + end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE; + + DBUG_LOCK_FILE; + fprintf(DBUG_FILE,"Directory dump (pos:length):\n"); + + for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++) + { + uint offset= uint2korr(end); + uint length= uint2korr(end+2); + fprintf(DBUG_FILE, " %4u:%4u", offset, offset ? length : 0); + if (!(row % (80/12))) + fputc('\n', DBUG_FILE); + if (offset) + { + DBUG_ASSERT(offset >= end_of_prev_row); + end_of_prev_row= offset + length; + } + } + fputc('\n', DBUG_FILE); + fflush(DBUG_FILE); + DBUG_UNLOCK_FILE; +} +#endif /* DBUG_OFF */ + diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h new file mode 100644 index 00000000000..ec99dbfcae2 --- /dev/null +++ b/storage/maria/ma_blockrec.h @@ -0,0 +1,160 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Storage of records in block +*/ + +#define LSN_SIZE 7 +#define DIRCOUNT_SIZE 1 /* Stores number of rows on page */ +#define EMPTY_SPACE_SIZE 2 /* Stores empty space on page */ +#define PAGE_TYPE_SIZE 1 +#define PAGE_SUFFIX_SIZE 0 /* Bytes for page suffix */ +#define PAGE_HEADER_SIZE (LSN_SIZE + DIRCOUNT_SIZE + EMPTY_SPACE_SIZE +\ + PAGE_TYPE_SIZE) +#define PAGE_OVERHEAD_SIZE (PAGE_HEADER_SIZE + DIR_ENTRY_SIZE + \ + PAGE_SUFFIX_SIZE) +#define BLOCK_RECORD_POINTER_SIZE 6 + +#define FULL_PAGE_SIZE(block_size) ((block_size) - LSN_SIZE - PAGE_TYPE_SIZE) + +#define ROW_EXTENT_PAGE_SIZE 5 +#define ROW_EXTENT_COUNT_SIZE 2 +#define ROW_EXTENT_SIZE (ROW_EXTENT_PAGE_SIZE + ROW_EXTENT_COUNT_SIZE) +#define TAIL_BIT 0x8000 /* Bit in page_count to signify tail */ +#define ELEMENTS_RESERVED_FOR_MAIN_PART 4 +#define EXTRA_LENGTH_FIELDS 3 + +#define FLAG_SIZE 1 +#define TRANSID_SIZE 6 +#define VERPTR_SIZE 7 +#define DIR_ENTRY_SIZE 4 +#define FIELD_OFFSET_SIZE 2 + +/* Minimum header size needed for a new row */ +#define BASE_ROW_HEADER_SIZE FLAG_SIZE +#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE + +#define PAGE_TYPE_MASK 127 +enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE }; + +#define PAGE_TYPE_OFFSET LSN_SIZE +#define DIR_ENTRY_OFFSET LSN_SIZE+PAGE_TYPE_SIZE +#define EMPTY_SPACE_OFFSET (DIR_ENTRY_OFFSET + DIRCOUNT_SIZE) + +#define PAGE_CAN_BE_COMPACTED 128 /* Bit in PAGE_TYPE */ + +/* Bits used for flag byte (one byte, first in record) */ +#define ROW_FLAG_TRANSID 1 +#define ROW_FLAG_VER_PTR 2 +#define ROW_FLAG_DELETE_TRANSID 4 +#define ROW_FLAG_NULLS_EXTENDED 8 +#define ROW_FLAG_EXTENTS 128 +#define ROW_FLAG_ALL (1+2+4+8+128) + +/* Variables that affects how data pages are utilized */ +#define MIN_TAIL_SIZE 32 + +/* Fixed part of Max possible header size; See table in ma_blockrec.c */ +#define MAX_FIXED_HEADER_SIZE (FLAG_SIZE + 3 + ROW_EXTENT_SIZE + 3) +#define TRANS_MAX_FIXED_HEADER_SIZE (MAX_FIXED_HEADER_SIZE + \ + FLAG_SIZE + TRANSID_SIZE + VERPTR_SIZE + \ + TRANSID_SIZE) + +/* We use 1 byte in record header to store number of directory entries */ +#define MAX_ROWS_PER_PAGE 255 + +/* Bits for MARIA_BITMAP_BLOCKS->used */ +#define BLOCKUSED_USED 1 +#define BLOCKUSED_USE_ORG_BITMAP 2 +#define BLOCKUSED_TAIL 4 + +/* defines that affects allocation (density) of data */ + +/* If we fill up a block to 75 %, don't create a new tail page for it */ +#define MAX_TAIL_SIZE(block_size) ((block_size) *3 / 4) + +/* Functions to convert MARIA_RECORD_POS to/from page:offset */ + +static inline MARIA_RECORD_POS ma_recordpos(ulonglong page, uint offset) +{ + return (MARIA_RECORD_POS) ((page << 8) | offset); +} + +static inline my_off_t ma_recordpos_to_page(MARIA_RECORD_POS record_pos) +{ + return record_pos >> 8; +} + +static inline my_off_t ma_recordpos_to_offset(MARIA_RECORD_POS record_pos) +{ + return record_pos & 255; +} + +/* ma_blockrec.c */ +void _ma_init_block_record_data(void); +my_bool _ma_once_init_block_row(MARIA_SHARE *share, File dfile); +my_bool _ma_once_end_block_row(MARIA_SHARE *share); +my_bool _ma_init_block_row(MARIA_HA *info); +void _ma_end_block_row(MARIA_HA *info); + +my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record); +my_bool _ma_delete_block_record(MARIA_HA *info); +int _ma_read_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos); +int _ma_read_block_record2(MARIA_HA *info, byte *record, + byte *data, byte *end_of_data); +int _ma_scan_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS, my_bool); +my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_scan_init_block_record(MARIA_HA *info); +void _ma_scan_end_block_record(MARIA_HA *info); + +MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info, + const byte *record); +my_bool _ma_write_block_record(MARIA_HA *info, const byte *record); +my_bool _ma_write_abort_block_record(MARIA_HA *info); +my_bool _ma_compare_block_record(register MARIA_HA *info, + register const byte *record); + +/* ma_bitmap.c */ +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file); +my_bool _ma_bitmap_end(MARIA_SHARE *share); +my_bool _ma_flush_bitmap(MARIA_SHARE *share); +my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap, + ulonglong page); +my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *result_blocks); +my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks); +my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents, + uint count); +my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head, + uint empty_space); +my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint page_count); +uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size); +my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row, + ulonglong page, uint free_size, + MARIA_BITMAP_BLOCKS *result_blocks); +my_bool _ma_check_bitmap_data(MARIA_HA *info, + enum en_page_type page_type, ulonglong page, + uint empty_space, uint *bitmap_pattern); +my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, + enum en_page_type page_type, + ulonglong page, + uint *bitmap_pattern); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 69d863e6366..3fece3687e1 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -28,43 +28,43 @@ #include #endif #include "ma_rt_index.h" +#include "ma_blockrec.h" -#ifndef USE_RAID -#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G) -#define my_raid_delete(A,B,C) my_delete(A,B) -#endif +/* Functions defined in this file */ - /* Functions defined in this file */ - -static int check_k_link(HA_CHECK *param, MARIA_HA *info,uint nr); +static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link); static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, - my_off_t page, uchar *buff, ha_rows *keys, + my_off_t page, byte *buff, ha_rows *keys, ha_checksum *key_checksum, uint level); static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); static int writekeys(HA_CHECK *param, MARIA_HA *info,byte *buff, my_off_t filepos); -static int sort_one_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, my_off_t pagepos, File new_file); -static int sort_key_read(MARIA_SORT_PARAM *sort_param,void *key); -static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param,void *key); +static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key); +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key); static int sort_get_next_record(MARIA_SORT_PARAM *sort_param); -static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,const void *b); -static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, const void *a); -static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a); +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, + const void *b); +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, + const byte *a); +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a); static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo, - uchar *key); + const byte *key); static int sort_insert_key(MARIA_SORT_PARAM *sort_param, reg1 SORT_KEY_BLOCKS *key_block, - uchar *key, my_off_t prev_block); + const byte *key, my_off_t prev_block); static int sort_delete_record(MARIA_SORT_PARAM *sort_param); /*static int _ma_flush_pending_blocks(HA_CHECK *param);*/ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length); static ha_checksum maria_byte_checksum(const byte *buf, uint length); static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); +static void restore_data_file_type(MARIA_SHARE *share); -void mariachk_init(HA_CHECK *param) +void maria_chk_init(HA_CHECK *param) { bzero((gptr) param,sizeof(*param)); param->opt_follow_links=1; @@ -182,7 +182,7 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag) else { param->record_checksum+=(ha_checksum) next_link; - next_link= _ma_rec_pos(info->s,(uchar*) buff+1); + next_link= _ma_rec_pos(info->s, buff+1); empty+=info->s->base.pack_reclength; } } @@ -223,18 +223,14 @@ wrong: /* Check delete links in index file */ -static int check_k_link(HA_CHECK *param, register MARIA_HA *info, uint nr) +static int check_k_link(HA_CHECK *param, register MARIA_HA *info, + my_off_t next_link) { - my_off_t next_link; - uint block_size=(nr+1)*MARIA_MIN_KEY_BLOCK_LENGTH; + uint block_size= info->s->block_size; ha_rows records; char llbuff[21],*buff; DBUG_ENTER("check_k_link"); - if (param->testflag & T_VERBOSE) - printf("block_size %4d:",block_size); - - next_link=info->s->state.key_del[nr]; records= (ha_rows) (info->state->key_file_length / block_size); while (next_link != HA_OFFSET_ERROR && records > 0) { @@ -243,12 +239,12 @@ static int check_k_link(HA_CHECK *param, register MARIA_HA *info, uint nr) if (param->testflag & T_VERBOSE) printf("%16s",llstr(next_link,llbuff)); if (next_link > info->state->key_file_length || - next_link & (info->s->blocksize-1)) + next_link & (info->s->block_size-1)) DBUG_RETURN(1); if (!(buff=key_cache_read(info->s->key_cache, info->s->kfile, next_link, DFLT_INIT_HITS, (byte*) info->buff, - maria_block_size, block_size, 1))) + block_size, block_size, 1))) DBUG_RETURN(1); next_link=mi_sizekorr(buff); records--; @@ -274,9 +270,10 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) char buff[22],buff2[22]; DBUG_ENTER("maria_chk_size"); - if (!(param->testflag & T_SILENT)) puts("- check file-size"); + if (!(param->testflag & T_SILENT)) + puts("- check file-size"); - /* The following is needed if called externally (not from mariachk) */ + /* The following is needed if called externally (not from maria_chk) */ flush_key_blocks(info->s->key_cache, info->s->kfile, FLUSH_FORCE_WRITE); @@ -291,7 +288,7 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) "Size of indexfile is: %-8s Should be: %s", llstr(size,buff), llstr(skr,buff2)); } - else + else if (!(param->testflag & T_VERY_SILENT)) _ma_check_print_warning(param, "Size of indexfile is: %-8s Should be: %s", llstr(size,buff), llstr(skr,buff2)); @@ -341,7 +338,7 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) } /* maria_chk_size */ - /* Check keys */ +/* Check keys */ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) { @@ -359,23 +356,22 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) puts("- check key delete-chain"); param->key_file_blocks=info->s->base.keystart; - for (key=0 ; key < info->s->state.header.max_block_size_index ; key++) - if (check_k_link(param,info,key)) - { - if (param->testflag & T_VERBOSE) puts(""); - _ma_check_print_error(param,"key delete-link-chain corrupted"); - DBUG_RETURN(-1); - } + if (check_k_link(param, info, info->s->state.key_del)) + { + if (param->testflag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"key delete-link-chain corrupted"); + DBUG_RETURN(-1); + } if (!(param->testflag & T_SILENT)) puts("- check index reference"); all_keydata=all_totaldata=key_totlength=0; old_record_checksum=0; init_checksum=param->record_checksum; - if (!(share->options & - (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) - old_record_checksum=calc_checksum(info->state->records+info->state->del-1)* - share->base.pack_reclength; + if (share->data_file_type == STATIC_RECORD) + old_record_checksum= (calc_checksum(info->state->records + + info->state->del-1) * + share->base.pack_reclength); rec_per_key_part= param->rec_per_key_part; for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++) @@ -433,11 +429,10 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) result= -1; continue; } - if (found_keys - full_text_keys == 1 && - ((share->options & - (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) || - (param->testflag & T_DONT_CHECK_CHECKSUM))) - old_record_checksum=param->record_checksum; + if ((found_keys - full_text_keys == 1 && + !(share->data_file_type == STATIC_RECORD)) || + (param->testflag & T_DONT_CHECK_CHECKSUM)) + old_record_checksum= param->record_checksum; else if (old_record_checksum != param->record_checksum) { if (key) @@ -456,7 +451,7 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) /* Check that auto_increment key is bigger than max key value */ ulonglong auto_increment; info->lastinx=key; - _ma_read_key_record(info, 0L, info->rec_buff); + _ma_read_key_record(info, info->rec_buff, 0); auto_increment= ma_retrieve_auto_increment(info, info->rec_buff); if (auto_increment > info->s->state.auto_increment) { @@ -479,7 +474,7 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) if (!maria_rkey(info, info->rec_buff, key, (const byte*) info->lastkey, keyinfo->seg->length, HA_READ_KEY_EXACT)) { - /* Don't count this as a real warning, as mariachk can't correct it */ + /* Don't count this as a real warning, as maria_chk can't correct it */ uint save=param->warning_printed; _ma_check_print_warning(param, "Found row where the auto_increment column has the value 0"); @@ -528,21 +523,22 @@ do_stat: } /* maria_chk_key */ -static int chk_index_down(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page, uchar *buff, ha_rows *keys, - ha_checksum *key_checksum, uint level) +static int chk_index_down(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) { char llbuff[22],llbuff2[22]; - if (page > info->state->key_file_length || (page & (info->s->blocksize -1))) + if (page > info->state->key_file_length || (page & (info->s->block_size -1))) { my_off_t max_length=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); _ma_check_print_error(param,"Wrong pagepointer: %s at page: %s", llstr(page,llbuff),llstr(page,llbuff2)); - if (page+info->s->blocksize > max_length) + if (page+info->s->block_size > max_length) goto err; info->state->key_file_length=(max_length & - ~ (my_off_t) (info->s->blocksize-1)); + ~ (my_off_t) (info->s->block_size-1)); } if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0)) { @@ -577,10 +573,10 @@ err: static void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, - uchar *key) + const byte *key) { uint first_null, kp; - first_null= ha_find_null(keyseg, key) - keyseg; + first_null= ha_find_null(keyseg, (uchar*) key) - keyseg; /* All prefix tuples that don't include keypart_{first_null} are not-null tuples (and all others aren't), increment counters for them. @@ -617,7 +613,8 @@ void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, static int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, - uchar *prev_key, uchar *last_key) + const byte *prev_key, + const byte *last_key) { uint diffs[2]; uint first_null_seg, kp; @@ -631,12 +628,12 @@ int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, last_key that is NULL or different from corresponding value in prev_key. */ - ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY, + ha_key_cmp(keyseg, (uchar*) prev_key, (uchar*) last_key, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs); seg= keyseg + diffs[0] - 1; /* Find first NULL in last_key */ - first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg; + first_null_seg= ha_find_null(seg, (uchar*) last_key + diffs[1]) - keyseg; for (kp= 0; kp < first_null_seg; kp++) notnull[kp]++; @@ -652,12 +649,12 @@ int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, /* Check if index is ok */ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page, uchar *buff, ha_rows *keys, + my_off_t page, byte *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { int flag; uint used_length,comp_flag,nod_flag,key_length=0; - uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + byte key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; my_off_t next_page,record; char llbuff[22]; uint diff_pos[2]; @@ -668,7 +665,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, if (keyinfo->flag & HA_SPATIAL) DBUG_RETURN(0); - if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length))) { _ma_check_print_error(param,"Not enough memory for keyblock"); DBUG_RETURN(-1); @@ -698,8 +695,8 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, { if (*_ma_killed_ptr(param)) goto err; - memcpy((char*) info->lastkey,(char*) key,key_length); - info->lastkey_length=key_length; + memcpy(info->lastkey, key, key_length); + info->lastkey_length= key_length; if (nod_flag) { next_page= _ma_kpos(nod_flag,keypos); @@ -713,21 +710,24 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, break; if (keypos > endpos) { - _ma_check_print_error(param,"Wrong key block length at page: %s",llstr(page,llbuff)); + _ma_check_print_error(param,"Wrong key block length at page: %s", + llstr(page,llbuff)); goto err; } if ((*keys)++ && - (flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length, - comp_flag, diff_pos)) >=0) + (flag=ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, + key_length, comp_flag, diff_pos)) >=0) { - DBUG_DUMP("old",(byte*) info->lastkey, info->lastkey_length); - DBUG_DUMP("new",(byte*) key, key_length); - DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + DBUG_DUMP("old", info->lastkey, info->lastkey_length); + DBUG_DUMP("new", key, key_length); + DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos)); if (comp_flag & SEARCH_FIND && flag == 0) - _ma_check_print_error(param,"Found duplicated key at page %s",llstr(page,llbuff)); + _ma_check_print_error(param,"Found duplicated key at page %s", + llstr(page,llbuff)); else - _ma_check_print_error(param,"Key in wrong position at page %s",llstr(page,llbuff)); + _ma_check_print_error(param,"Key in wrong position at page %s", + llstr(page,llbuff)); goto err; } if (param->testflag & T_STATISTICS) @@ -735,14 +735,14 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, if (*keys != 1L) /* not first_key */ { if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) - ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, - SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, + ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, + USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg, - param->notnull_count, - info->lastkey, key); + param->notnull_count, + info->lastkey, key); } param->unique_count[diff_pos[0]-1]++; } @@ -795,7 +795,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); goto err; } - param->record_checksum+=(ha_checksum) record; + param->record_checksum+= (ha_checksum) record; } if (keypos != endpos) { @@ -852,22 +852,783 @@ static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo) } /* key_length */ + +static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos, + char *buff) +{ + if (info->s->data_file_type != BLOCK_RECORD) + llstr(recpos, buff); + else + { + my_off_t page= ma_recordpos_to_page(recpos); + uint row= ma_recordpos_to_offset(recpos); + char *end= longlong10_to_str(page, buff, 10); + *(end++)= ':'; + longlong10_to_str(row, end, 10); + } +} + + +/* + Check that keys in records exist in index tree + + SYNOPSIS + check_keys_in_record() + param Check paramenter + info Maria handler + extend Type of check (extended or normal) + start_recpos Position to row + record Record buffer + + NOTES + This function also calculates record checksum & number of rows +*/ + +static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend, + my_off_t start_recpos, byte *record) +{ + MARIA_KEYDEF *keyinfo; + char llbuff[22+4]; + uint key; + + param->tmp_record_checksum+= (ha_checksum) start_recpos; + param->records++; + if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0) + { + printf("%s\r", llstr(param->records, llbuff)); + VOID(fflush(stdout)); + } + + /* Check if keys match the record */ + for (key=0, keyinfo= info->s->keyinfo; key < info->s->base.keys; + key++,keyinfo++) + { + if (maria_is_key_active(info->s->state.key_map, key)) + { + if(!(keyinfo->flag & HA_FULLTEXT)) + { + uint key_length= _ma_make_key(info,key,info->lastkey,record, + start_recpos); + if (extend) + { + /* We don't need to lock the key tree here as we don't allow + concurrent threads when running maria_chk + */ + int search_result= +#ifdef HAVE_RTREE_KEYS + (keyinfo->flag & HA_SPATIAL) ? + maria_rtree_find_first(info, key, info->lastkey, key_length, + MBR_EQUAL | MBR_DATA) : +#endif + _ma_search(info,keyinfo,info->lastkey,key_length, + SEARCH_SAME, info->s->state.key_root[key]); + if (search_result) + { + record_pos_to_txt(info, start_recpos, llbuff); + _ma_check_print_error(param,"Record at: %14s Can't find key for index: %2d", + llbuff,key+1); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + return -1; + } + } + else + param->tmp_key_crc[key]+= + maria_byte_checksum((byte*) info->lastkey, key_length); + } + } + } + return 0; +} + + +/* + Functions to loop through all rows and check if they are ok + + NOTES + One function for each record format + + RESULT + 0 ok + -1 Interrupted by user + 1 Error +*/ + +static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t start_recpos, pos; + char llbuff[22]; + + pos= 0; + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + return -1; + if (my_b_read(¶m->read_cache,(byte*) record, + info->s->base.pack_reclength)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(pos, llbuff)); + return 1; + } + start_recpos= pos; + pos+= info->s->base.pack_reclength; + param->splits++; + if (*record == '\0') + { + param->del_blocks++; + param->del_length+= info->s->base.pack_reclength; + continue; /* Record removed */ + } + param->glob_crc+= _ma_static_checksum(info,record); + param->used+= info->s->base.pack_reclength; + if (check_keys_in_record(param, info, extend, start_recpos, record)) + return 1; + } + return 0; +} + + +static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + MARIA_BLOCK_INFO block_info; + my_off_t start_recpos, start_block, pos; + byte *to; + ulong left_length; + uint b_type; + char llbuff[22],llbuff2[22],llbuff3[22]; + DBUG_ENTER("check_dynamic_record"); + + pos= 0; + while (pos < info->state->data_file_length) + { + my_bool got_error= 0; + int flag; + if (*_ma_killed_ptr(param)) + DBUG_RETURN(-1); + + flag= block_info.second_read=0; + block_info.next_filepos=pos; + do + { + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, + (start_block=block_info.next_filepos), + sizeof(block_info.header), + (flag ? 0 : READING_NEXT) | READING_HEADER)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(start_block, llbuff)); + DBUG_RETURN(1); + } + + if (start_block & (MARIA_DYN_ALIGN_SIZE-1)) + { + _ma_check_print_error(param,"Wrong aligned block at %s", + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + b_type= _ma_get_block_info(&block_info,-1,start_block); + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & BLOCK_SYNC_ERROR) + { + if (flag) + { + _ma_check_print_error(param,"Unexpected byte: %d at link: %s", + (int) block_info.header[0], + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + pos=block_info.filepos+block_info.block_len; + goto next; + } + if (b_type & BLOCK_DELETED) + { + if (block_info.block_len < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Deleted block with impossible length %lu at %s", + block_info.block_len,llstr(pos,llbuff)); + DBUG_RETURN(1); + } + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + _ma_check_print_error(param,"Delete link points outside datafile at %s", + llstr(pos,llbuff)); + DBUG_RETURN(1); + } + param->del_blocks++; + param->del_length+= block_info.block_len; + param->splits++; + pos= block_info.filepos+block_info.block_len; + goto next; + } + _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s", + block_info.header[0],block_info.header[1], + block_info.header[2], + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + if (info->state->data_file_length < block_info.filepos+ + block_info.block_len) + { + _ma_check_print_error(param, + "Recordlink that points outside datafile at %s", + llstr(pos,llbuff)); + got_error=1; + break; + } + param->splits++; + if (!flag++) /* First block */ + { + start_recpos=pos; + pos=block_info.filepos+block_info.block_len; + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (info->s->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + + { + _ma_check_print_error(param, + "Not enough memory (%lu) for blob at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + } + to= info->rec_buff; + left_length= block_info.rec_len; + } + if (left_length < block_info.data_len) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.data_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (_ma_read_cache(¶m->read_cache,(byte*) to,block_info.filepos, + (uint) block_info.data_len, + flag == 1 ? READING_NEXT : 0)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff)); + + DBUG_RETURN(1); + } + to+=block_info.data_len; + param->link_used+= block_info.filepos-start_block; + param->used+= block_info.filepos - start_block + block_info.data_len; + param->empty+= block_info.block_len-block_info.data_len; + left_length-= block_info.data_len; + if (left_length) + { + if (b_type & BLOCK_LAST) + { + _ma_check_print_error(param, + "Wrong record length %s of %s at %s", + llstr(block_info.rec_len-left_length,llbuff), + llstr(block_info.rec_len, llbuff2), + llstr(start_recpos,llbuff3)); + got_error=1; + break; + } + if (info->state->data_file_length < block_info.next_filepos) + { + _ma_check_print_error(param, + "Found next-recordlink that points outside datafile at %s", + llstr(block_info.filepos,llbuff)); + got_error=1; + break; + } + } + } while (left_length); + + if (! got_error) + { + if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) == + MY_FILE_ERROR) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + } + else + { + info->cur_row.checksum= _ma_checksum(info,record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) + { + if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, + test(info->s->calc_checksum))) + { + _ma_check_print_error(param,"Found wrong packed record at %s", + llstr(start_recpos,llbuff)); + got_error= 1; + } + } + param->glob_crc+= info->cur_row.checksum; + } + + if (! got_error) + { + if (check_keys_in_record(param, info, extend, start_recpos, record)) + DBUG_RETURN(1); + } + else + { + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + else if (!flag) + pos= block_info.filepos+block_info.block_len; +next:; + } + DBUG_RETURN(0); +} + + +static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t start_recpos, pos; + char llbuff[22]; + bool got_error= 0; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("check_compressed_record"); + + pos= info->s->pack.header_length; /* Skip header */ + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(-1); + + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, pos, + info->s->pack.ref_length, READING_NEXT)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(pos, llbuff)); + DBUG_RETURN(1); + } + + start_recpos= pos; + param->splits++; + VOID(_ma_pack_get_block_info(info,&block_info, -1, start_recpos)); + pos=block_info.filepos+block_info.rec_len; + if (block_info.rec_len < (uint) info->s->min_pack_length || + block_info.rec_len > (uint) info->s->max_pack_length) + { + _ma_check_print_error(param, + "Found block with wrong recordlength: %d at %s", + block_info.rec_len, llstr(start_recpos,llbuff)); + got_error=1; + goto end; + } + if (_ma_read_cache(¶m->read_cache,(byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, READING_NEXT)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(block_info.filepos, llbuff)); + DBUG_RETURN(1); + } + if (_ma_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len)) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + goto end; + } + param->glob_crc+= (*info->s->calc_checksum)(info,record); + param->link_used+= (block_info.filepos - start_recpos); + param->used+= (pos-start_recpos); + +end: + if (! got_error) + { + if (check_keys_in_record(param, info, extend, start_recpos, record)) + DBUG_RETURN(1); + } + else + { + got_error= 0; /* Reset for next loop */ + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Check if layout on a page is ok +*/ + +static int check_page_layout(HA_CHECK *param, MARIA_HA *info, + my_off_t page_pos, byte *page, + uint row_count, uint head_empty, + uint *real_rows_found) +{ + uint empty, last_row_end, row, first_dir_entry; + byte *dir_entry; + char llbuff[22]; + DBUG_ENTER("check_page_layout"); + + empty= 0; + last_row_end= PAGE_HEADER_SIZE; + *real_rows_found= 0; + + dir_entry= page+ info->s->block_size - PAGE_SUFFIX_SIZE; + first_dir_entry= info->s->block_size - row_count* DIR_ENTRY_SIZE; + for (row= 0 ; row < row_count ; row++) + { + uint pos, length; + dir_entry-= DIR_ENTRY_SIZE; + pos= uint2korr(dir_entry); + if (!pos) + { + if (row == row_count -1) + { + _ma_check_print_error(param, + "Page %9s: First entry in directory is 0", + llstr(page_pos, llbuff)); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + continue; /* Deleted row */ + } + (*real_rows_found)++; + length= uint2korr(dir_entry+2); + param->used+= length; + if (pos < last_row_end) + { + _ma_check_print_error(param, + "Page %9s: Row %3u overlapps with previous row", + llstr(page_pos, llbuff), row); + DBUG_RETURN(1); + } + empty+= (pos - last_row_end); + last_row_end= pos + length; + if (last_row_end > first_dir_entry) + { + _ma_check_print_error(param, + "Page %9s: Row %3u overlapps with directory", + llstr(page_pos, llbuff), row); + DBUG_RETURN(1); + } + } + empty+= (first_dir_entry - last_row_end); + + if (empty != head_empty) + { + _ma_check_print_error(param, + "Page %9s: Wrong empty size. Stored: %5u Actual: %5u", + llstr(page_pos, llbuff), head_empty, empty); + DBUG_RETURN(param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)); + } + DBUG_RETURN(0); +} + + +/* + Check all rows on head page + + NOTES + Before this, we have already called check_page_layout(), so + we know the block is logicaly correct (even if the rows may not be that) + + RETURN + 0 ok + 1 error +*/ + + +static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, byte *record, + int extend, my_off_t page_pos, byte *page_buff, + uint row_count) +{ + byte *dir_entry; + uint row; + char llbuff[22], llbuff2[22]; + DBUG_ENTER("check_head_page"); + + dir_entry= page_buff+ info->s->block_size - PAGE_SUFFIX_SIZE; + for (row= 0 ; row < row_count ; row++) + { + uint pos, length, flag; + dir_entry-= DIR_ENTRY_SIZE; + pos= uint2korr(dir_entry); + if (!pos) + continue; + length= uint2korr(dir_entry+2); + if (length < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Page %9s: Row %3u is too short (%d bytes)", + llstr(page_pos, llbuff), row, length); + DBUG_RETURN(1); + } + flag= (uint) (uchar) page_buff[pos]; + if (flag & ~(ROW_FLAG_ALL)) + _ma_check_print_error(param, + "Page %9s: Row %3u has wrong flag: %d", + llstr(page_pos, llbuff), row, flag); + + DBUG_PRINT("info", ("rowid: %s page: %lu row: %u", + llstr(ma_recordpos(page_pos/info->s->block_size, row), + llbuff), + (ulong) (page_pos / info->s->block_size), row)); + if (_ma_read_block_record2(info, record, page_buff+pos, + page_buff+pos+length)) + { + _ma_check_print_error(param, + "Page %9s: Row %3d is crashed", + llstr(page_pos, llbuff), row); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + continue; + } + if (info->s->calc_checksum) + { + info->cur_row.checksum= _ma_checksum(info, record); + param->glob_crc+= info->cur_row.checksum; + } + if (info->cur_row.extents_count) + { + byte *extents= info->cur_row.extents; + uint i; + /* Check that bitmap has the right marker for the found extents */ + for (i= 0 ; i < info->cur_row.extents_count ; i++) + { + uint page, page_count, page_type; + page= uint5korr(extents); + page_count= uint2korr(extents+5); + extents+= ROW_EXTENT_SIZE; + page_type= BLOB_PAGE; + if (page_count & TAIL_BIT) + { + page_count= 1; + page_type= TAIL_PAGE; + } + for ( ; page_count--; page++) + { + uint bitmap_pattern; + if (_ma_check_if_right_bitmap_type(info, page_type, page, + &bitmap_pattern)) + { + _ma_check_print_error(param, + "Page %9s: Row: %3d has an extent with wrong information in bitmap: Page %9s Page_type: %d Bitmap: %d", + llstr(page_pos, llbuff), row, + llstr(page * info->s->bitmap.block_size, + llbuff2), + page_type, + bitmap_pattern); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + } + } + param->full_page_count+= info->cur_row.full_page_count; + param->tail_count+= info->cur_row.tail_count; + if (check_keys_in_record(param, info, extend, + ma_recordpos(page_pos/info->s->block_size, row), + record)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + + +static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t pos; + byte *page_buff, *bitmap_buff, *data; + char llbuff[22], llbuff2[22]; + uint block_size= info->s->block_size; + ha_rows full_page_count, tail_count; + my_bool full_dir; + uint offset_page, offset; + + if (_ma_scan_init_block_record(info)) + { + _ma_check_print_error(param, "got error %d when initializing scan", + my_errno); + return 1; + } + bitmap_buff= info->scan.bitmap_buff; + page_buff= info->scan.page_buff; + full_page_count= tail_count= 0; + param->full_page_count= param->tail_count= 0; + param->used= param->link_used= 0; + + for (pos= 0; + pos < info->state->data_file_length; + pos+= block_size) + { + uint row_count, real_row_count, empty_space, page_type, bitmap_pattern; + LINT_INIT(row_count); + LINT_INIT(empty_space); + + if (*_ma_killed_ptr(param)) + { + _ma_scan_end_block_record(info); + return -1; + } + if (((pos / block_size) % info->s->bitmap.pages_covered) == 0) + { + /* Bitmap page */ + if (_ma_read_cache(¶m->read_cache, bitmap_buff, pos, + block_size, READING_NEXT)) + { + _ma_check_print_error(param, + "Page %9s: Got error: %d when reading datafile", + my_errno, llstr(pos, llbuff)); + goto err; + } + param->used+= block_size; + param->link_used+= block_size; + continue; + } + /* Skip pages marked as empty in bitmap */ + offset_page= (((pos / block_size) % info->s->bitmap.pages_covered) -1) * 3; + offset= offset_page & 7; + data= bitmap_buff + offset_page / 8; + bitmap_pattern= uint2korr(data); + param->splits++; + if (!((bitmap_pattern >> offset) & 7)) + { + param->empty+= block_size; + param->del_blocks++; + continue; + } + + if (_ma_read_cache(¶m->read_cache, page_buff, pos, + block_size, READING_NEXT)) + { + _ma_check_print_error(param, + "Page %9s: Got error: %d when reading datafile", + my_errno, llstr(pos, llbuff)); + goto err; + } + page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK; + if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE) + { + _ma_check_print_error(param, + "Page %9s: Found wrong page type %d\n", + llstr(pos, llbuff), page_type); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err; + } + switch ((enum en_page_type) page_type) { + case UNALLOCATED_PAGE: + case MAX_PAGE_TYPE: + DBUG_PRINT("warning", + ("Found page with wrong page type: %d", page_type)); + DBUG_ASSERT(0); + break; + case HEAD_PAGE: + row_count= ((uchar*) page_buff)[DIR_ENTRY_OFFSET]; + empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); + param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + full_dir= row_count == MAX_ROWS_PER_PAGE; + break; + case TAIL_PAGE: + row_count= ((uchar*) page_buff)[DIR_ENTRY_OFFSET]; + empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); + param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + full_dir= row_count == MAX_ROWS_PER_PAGE; + break; + case BLOB_PAGE: + full_page_count++; + full_dir= 0; + empty_space= block_size; /* for error reporting */ + param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE); + param->used+= block_size; + break; + } + if (_ma_check_bitmap_data(info, page_type, pos / block_size, + full_dir ? 0 : empty_space, + &bitmap_pattern)) + { + _ma_check_print_error(param, + "Page %9s: Wrong data in bitmap. Page_type: %d empty_space: %u Bitmap: %d", + llstr(pos, llbuff), page_type, empty_space, + bitmap_pattern); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err; + } + if ((enum en_page_type) page_type == BLOB_PAGE) + continue; + param->empty+= empty_space; + if (check_page_layout(param, info, pos, page_buff, row_count, + empty_space, &real_row_count)) + goto err; + if ((enum en_page_type) page_type == TAIL_PAGE) + { + tail_count+= real_row_count; + continue; + } + if (check_head_page(param, info, record, extend, pos, page_buff, + row_count)) + goto err; + } + + _ma_scan_end_block_record(info); + + if (full_page_count != param->full_page_count) + _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table", + llstr(param->full_page_count, llbuff), + llstr(full_page_count, llbuff2)); + if (tail_count != param->tail_count) + _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table", + llstr(param->tail_count, llbuff), + llstr(tail_count, llbuff2)); + + /* Update splits to avoid warning */ + info->s->state.split= param->splits; + info->state->del= param->del_blocks; + return param->error_printed != 0; + +err: + _ma_scan_end_block_record(info); + return 1; +} + + /* Check that record-link is ok */ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) { - int error,got_error,flag; - uint key,left_length,b_type,field; - ha_rows records,del_blocks; - my_off_t used,empty,pos,splits,start_recpos, - del_length,link_used,start_block; - byte *record,*to; + int error; + byte *record; char llbuff[22],llbuff2[22],llbuff3[22]; - ha_checksum intern_record_checksum; - ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY]; - my_bool static_row_size; - MARIA_KEYDEF *keyinfo; - MARIA_BLOCK_INFO block_info; DBUG_ENTER("maria_chk_data_link"); if (!(param->testflag & T_SILENT)) @@ -883,324 +1644,52 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) _ma_check_print_error(param,"Not enough memory for record"); DBUG_RETURN(-1); } - records=del_blocks=0; - used=link_used=splits=del_length=0; - intern_record_checksum=param->glob_crc=0; + param->records= param->del_blocks= 0; + param->used= param->link_used= param->splits= param->del_length= 0; + param->tmp_record_checksum= param->glob_crc= 0; + param->err_count= 0; LINT_INIT(left_length); LINT_INIT(start_recpos); LINT_INIT(to); - got_error=error=0; - empty=info->s->pack.header_length; + error= 0; + param->empty= info->s->pack.header_length; - /* Check how to calculate checksum of rows */ - static_row_size=1; - if (info->s->data_file_type == COMPRESSED_RECORD) - { - for (field=0 ; field < info->s->base.fields ; field++) - { - if (info->s->rec[field].base_type == FIELD_BLOB || - info->s->rec[field].base_type == FIELD_VARCHAR) - { - static_row_size=0; - break; - } - } - } + bzero((char*) param->tmp_key_crc, + info->s->base.keys * sizeof(param->tmp_key_crc[0])); + + switch (info->s->data_file_type) { + case BLOCK_RECORD: + error= check_block_record(param, info, extend, record); + break; + case STATIC_RECORD: + error= check_static_record(param, info, extend, record); + break; + case DYNAMIC_RECORD: + error= check_dynamic_record(param, info, extend, record); + break; + case COMPRESSED_RECORD: + error= check_compressed_record(param, info, extend, record); + break; + } /* switch */ - pos=my_b_tell(¶m->read_cache); - bzero((char*) key_checksum, info->s->base.keys * sizeof(key_checksum[0])); - while (pos < info->state->data_file_length) - { - if (*_ma_killed_ptr(param)) - goto err2; - switch (info->s->data_file_type) { - case STATIC_RECORD: - if (my_b_read(¶m->read_cache,(byte*) record, - info->s->base.pack_reclength)) - goto err; - start_recpos=pos; - pos+=info->s->base.pack_reclength; - splits++; - if (*record == '\0') - { - del_blocks++; - del_length+=info->s->base.pack_reclength; - continue; /* Record removed */ - } - param->glob_crc+= _ma_static_checksum(info,record); - used+=info->s->base.pack_reclength; - break; - case DYNAMIC_RECORD: - flag=block_info.second_read=0; - block_info.next_filepos=pos; - do - { - if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, - (start_block=block_info.next_filepos), - sizeof(block_info.header), - (flag ? 0 : READING_NEXT) | READING_HEADER)) - goto err; - if (start_block & (MARIA_DYN_ALIGN_SIZE-1)) - { - _ma_check_print_error(param,"Wrong aligned block at %s", - llstr(start_block,llbuff)); - goto err2; - } - b_type= _ma_get_block_info(&block_info,-1,start_block); - if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | - BLOCK_FATAL_ERROR)) - { - if (b_type & BLOCK_SYNC_ERROR) - { - if (flag) - { - _ma_check_print_error(param,"Unexpected byte: %d at link: %s", - (int) block_info.header[0], - llstr(start_block,llbuff)); - goto err2; - } - pos=block_info.filepos+block_info.block_len; - goto next; - } - if (b_type & BLOCK_DELETED) - { - if (block_info.block_len < info->s->base.min_block_length) - { - _ma_check_print_error(param, - "Deleted block with impossible length %lu at %s", - block_info.block_len,llstr(pos,llbuff)); - goto err2; - } - if ((block_info.next_filepos != HA_OFFSET_ERROR && - block_info.next_filepos >= info->state->data_file_length) || - (block_info.prev_filepos != HA_OFFSET_ERROR && - block_info.prev_filepos >= info->state->data_file_length)) - { - _ma_check_print_error(param,"Delete link points outside datafile at %s", - llstr(pos,llbuff)); - goto err2; - } - del_blocks++; - del_length+=block_info.block_len; - pos=block_info.filepos+block_info.block_len; - splits++; - goto next; - } - _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s", - block_info.header[0],block_info.header[1], - block_info.header[2], - llstr(start_block,llbuff)); - goto err2; - } - if (info->state->data_file_length < block_info.filepos+ - block_info.block_len) - { - _ma_check_print_error(param, - "Recordlink that points outside datafile at %s", - llstr(pos,llbuff)); - got_error=1; - break; - } - splits++; - if (!flag++) /* First block */ - { - start_recpos=pos; - pos=block_info.filepos+block_info.block_len; - if (block_info.rec_len > (uint) info->s->base.max_pack_length) - { - _ma_check_print_error(param,"Found too long record (%lu) at %s", - (ulong) block_info.rec_len, - llstr(start_recpos,llbuff)); - got_error=1; - break; - } - if (info->s->base.blobs) - { - if (!(to= _ma_alloc_rec_buff(info, block_info.rec_len, - &info->rec_buff))) - { - _ma_check_print_error(param, - "Not enough memory (%lu) for blob at %s", - (ulong) block_info.rec_len, - llstr(start_recpos,llbuff)); - got_error=1; - break; - } - } - else - to= info->rec_buff; - left_length=block_info.rec_len; - } - if (left_length < block_info.data_len) - { - _ma_check_print_error(param,"Found too long record (%lu) at %s", - (ulong) block_info.data_len, - llstr(start_recpos,llbuff)); - got_error=1; - break; - } - if (_ma_read_cache(¶m->read_cache,(byte*) to,block_info.filepos, - (uint) block_info.data_len, - flag == 1 ? READING_NEXT : 0)) - goto err; - to+=block_info.data_len; - link_used+= block_info.filepos-start_block; - used+= block_info.filepos - start_block + block_info.data_len; - empty+=block_info.block_len-block_info.data_len; - left_length-=block_info.data_len; - if (left_length) - { - if (b_type & BLOCK_LAST) - { - _ma_check_print_error(param, - "Wrong record length %s of %s at %s", - llstr(block_info.rec_len-left_length,llbuff), - llstr(block_info.rec_len, llbuff2), - llstr(start_recpos,llbuff3)); - got_error=1; - break; - } - if (info->state->data_file_length < block_info.next_filepos) - { - _ma_check_print_error(param, - "Found next-recordlink that points outside datafile at %s", - llstr(block_info.filepos,llbuff)); - got_error=1; - break; - } - } - } while (left_length); - if (! got_error) - { - if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) == - MY_FILE_ERROR) - { - _ma_check_print_error(param,"Found wrong record at %s", - llstr(start_recpos,llbuff)); - got_error=1; - } - else - { - info->checksum=_ma_checksum(info,record); - if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) - { - if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, - test(info->s->calc_checksum))) - { - _ma_check_print_error(param,"Found wrong packed record at %s", - llstr(start_recpos,llbuff)); - got_error=1; - } - } - if (!got_error) - param->glob_crc+= info->checksum; - } - } - else if (!flag) - pos=block_info.filepos+block_info.block_len; - break; - case COMPRESSED_RECORD: - if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, pos, - info->s->pack.ref_length, READING_NEXT)) - goto err; - start_recpos=pos; - splits++; - VOID(_ma_pack_get_block_info(info,&block_info, -1, start_recpos)); - pos=block_info.filepos+block_info.rec_len; - if (block_info.rec_len < (uint) info->s->min_pack_length || - block_info.rec_len > (uint) info->s->max_pack_length) - { - _ma_check_print_error(param, - "Found block with wrong recordlength: %d at %s", - block_info.rec_len, llstr(start_recpos,llbuff)); - got_error=1; - break; - } - if (_ma_read_cache(¶m->read_cache,(byte*) info->rec_buff, - block_info.filepos, block_info.rec_len, READING_NEXT)) - goto err; - if (_ma_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len)) - { - _ma_check_print_error(param,"Found wrong record at %s", - llstr(start_recpos,llbuff)); - got_error=1; - } - if (static_row_size) - param->glob_crc+= _ma_static_checksum(info,record); - else - param->glob_crc+= _ma_checksum(info,record); - link_used+= (block_info.filepos - start_recpos); - used+= (pos-start_recpos); - } /* switch */ - if (! got_error) - { - intern_record_checksum+=(ha_checksum) start_recpos; - records++; - if (param->testflag & T_WRITE_LOOP && records % WRITE_COUNT == 0) - { - printf("%s\r", llstr(records,llbuff)); VOID(fflush(stdout)); - } + if (error) + goto err; - /* Check if keys match the record */ - - for (key=0,keyinfo= info->s->keyinfo; key < info->s->base.keys; - key++,keyinfo++) - { - if (maria_is_key_active(info->s->state.key_map, key)) - { - if(!(keyinfo->flag & HA_FULLTEXT)) - { - uint key_length= _ma_make_key(info,key,info->lastkey,record, - start_recpos); - if (extend) - { - /* We don't need to lock the key tree here as we don't allow - concurrent threads when running mariachk - */ - int search_result= -#ifdef HAVE_RTREE_KEYS - (keyinfo->flag & HA_SPATIAL) ? - maria_rtree_find_first(info, key, info->lastkey, key_length, - MBR_EQUAL | MBR_DATA) : -#endif - _ma_search(info,keyinfo,info->lastkey,key_length, - SEARCH_SAME, info->s->state.key_root[key]); - if (search_result) - { - _ma_check_print_error(param,"Record at: %10s Can't find key for index: %2d", - llstr(start_recpos,llbuff),key+1); - if (error++ > MAXERR || !(param->testflag & T_VERBOSE)) - goto err2; - } - } - else - key_checksum[key]+=maria_byte_checksum((byte*) info->lastkey, - key_length); - } - } - } - } - else - { - got_error=0; - if (error++ > MAXERR || !(param->testflag & T_VERBOSE)) - goto err2; - } - next:; /* Next record */ - } if (param->testflag & T_WRITE_LOOP) { VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); } - if (records != info->state->records) + if (param->records != info->state->records) { - _ma_check_print_error(param,"Record-count is not ok; is %-10s Should be: %s", - llstr(records,llbuff), llstr(info->state->records,llbuff2)); + _ma_check_print_error(param, + "Record-count is not ok; found %-10s Should be: %s", + llstr(param->records,llbuff), + llstr(info->state->records,llbuff2)); error=1; } else if (param->record_checksum && - param->record_checksum != intern_record_checksum) + param->record_checksum != param->tmp_record_checksum) { _ma_check_print_error(param, - "Keypointers and record positions doesn't match"); + "Key pointers and record positions doesn't match"); error=1; } else if (param->glob_crc != info->state->checksum && @@ -1213,9 +1702,10 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) } else if (!extend) { + uint key; for (key=0 ; key < info->s->base.keys; key++) { - if (key_checksum[key] != param->key_crc[key] && + if (param->tmp_key_crc[key] != param->key_crc[key] && !(info->s->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL))) { _ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records", @@ -1225,68 +1715,83 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) } } - if (del_length != info->state->empty) + if (param->del_length != info->state->empty) { _ma_check_print_warning(param, "Found %s deleted space. Should be %s", - llstr(del_length,llbuff2), + llstr(param->del_length,llbuff2), llstr(info->state->empty,llbuff)); } - if (used+empty+del_length != info->state->data_file_length) + if (param->used + param->empty + param->del_length != + info->state->data_file_length) { _ma_check_print_warning(param, - "Found %s record-data and %s unused data and %s deleted-data", - llstr(used,llbuff),llstr(empty,llbuff2), - llstr(del_length,llbuff3)); + "Found %s record data and %s unused data and %s deleted data", + llstr(param->used, llbuff), + llstr(param->empty,llbuff2), + llstr(param->del_length,llbuff3)); _ma_check_print_warning(param, - "Total %s, Should be: %s", - llstr((used+empty+del_length),llbuff), + "Total %s Should be: %s", + llstr((param->used+param->empty+param->del_length), + llbuff), llstr(info->state->data_file_length,llbuff2)); } - if (del_blocks != info->state->del) + if (param->del_blocks != info->state->del) { _ma_check_print_warning(param, "Found %10s deleted blocks Should be: %s", - llstr(del_blocks,llbuff), + llstr(param->del_blocks,llbuff), llstr(info->state->del,llbuff2)); } - if (splits != info->s->state.split) + if (param->splits != info->s->state.split) { _ma_check_print_warning(param, "Found %10s parts Should be: %s parts", - llstr(splits,llbuff), + llstr(param->splits, llbuff), llstr(info->s->state.split,llbuff2)); } if (param->testflag & T_INFO) { if (param->warning_printed || param->error_printed) puts(""); - if (used != 0 && ! param->error_printed) + if (param->used != 0 && ! param->error_printed) { - printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n", - llstr(records,llbuff), (long)((used-link_used)/records), - (info->s->base.blobs ? 0.0 : - (ulonglong2double((ulonglong) info->s->base.reclength*records)- - my_off_t2double(used))/ - ulonglong2double((ulonglong) info->s->base.reclength*records)*100.0)); - printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n", - (ulonglong2double(used-link_used)/ulonglong2double(used-link_used+empty)*100.0), - (!records ? 100 : (int) (ulonglong2double(del_length+empty)/ - my_off_t2double(used)*100.0)), - ulonglong2double(splits - del_blocks) / records); + if (param->records) + { + printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n", + llstr(param->records,llbuff), + (long)((param->used - param->link_used)/param->records), + (info->s->base.blobs ? 0.0 : + (ulonglong2double((ulonglong) info->s->base.reclength * + param->records)- + my_off_t2double(param->used))/ + ulonglong2double((ulonglong) info->s->base.reclength * + param->records)*100.0)); + printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n", + (ulonglong2double(param->used - param->link_used)/ + ulonglong2double(param->used-param->link_used+param->empty)*100.0), + (!param->records ? 100 : + (int) (ulonglong2double(param->del_length+param->empty)/ + my_off_t2double(param->used)*100.0)), + ulonglong2double(param->splits - param->del_blocks) / + param->records); + } + else + printf("Records:%18s\n", "0"); } printf("Record blocks:%12s Delete blocks:%10s\n", - llstr(splits-del_blocks,llbuff),llstr(del_blocks,llbuff2)); + llstr(param->splits - param->del_blocks, llbuff), + llstr(param->del_blocks, llbuff2)); printf("Record data: %12s Deleted data: %10s\n", - llstr(used-link_used,llbuff),llstr(del_length,llbuff2)); + llstr(param->used - param->link_used,llbuff), + llstr(param->del_length, llbuff2)); printf("Lost space: %12s Linkdata: %10s\n", - llstr(empty,llbuff),llstr(link_used,llbuff2)); + llstr(param->empty, llbuff),llstr(param->link_used, llbuff2)); } my_free((gptr) record,MYF(0)); DBUG_RETURN (error); + err: - _ma_check_print_error(param,"got error: %d when reading datafile at record: %s",my_errno, llstr(records,llbuff)); - err2: my_free((gptr) record,MYF(0)); param->testflag|=T_RETRY_WITHOUT_QUICK; DBUG_RETURN(1); @@ -1297,7 +1802,7 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) /* Save new datafile-name in temp_filename */ int maria_repair(HA_CHECK *param, register MARIA_HA *info, - my_string name, int rep_quick) + my_string name, int rep_quick) { int error,got_error; uint i; @@ -1348,7 +1853,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, info->opt_flag|=WRITE_CACHE_USED; if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, MYF(0))) || - !_ma_alloc_rec_buff(info, -1, &sort_param.rec_buff)) + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) { _ma_check_print_error(param, "Not enough memory for extra record"); goto err; @@ -1357,14 +1863,11 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { /* Get real path for data file */ - if ((new_file=my_raid_create(fn_format(param->temp_filename, - share->data_file_name, "", - DATA_TMP_EXT, 2+4), - 0,param->tmpfile_createflag, - share->base.raid_type, - share->base.raid_chunks, - share->base.raid_chunksize, - MYF(0))) < 0) + if ((new_file= my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", param->temp_filename); @@ -1376,10 +1879,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, info->s->state.dellink= HA_OFFSET_ERROR; info->rec_cache.file=new_file; if (param->testflag & T_UNPACK) - { - share->options&= ~HA_OPTION_COMPRESS_RECORD; - mi_int2store(share->state.header.options,share->options); - } + restore_data_file_type(share); } sort_info.info=info; sort_info.param = param; @@ -1411,8 +1911,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, share->state.key_root[i]= HA_OFFSET_ERROR; /* Drop the delete chain. */ - for (i=0 ; i < share->state.header.max_block_size_index ; i++) - share->state.key_del[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; /* If requested, activate (enable) all keys in key_map. In this case, @@ -1436,7 +1935,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, _ma_check_print_info(param,"Duplicate key %2d for record at %10s against new record at %10s", info->errkey+1, llstr(sort_param.start_recpos,llbuff), - llstr(info->dupp_key_pos,llbuff2)); + llstr(info->dup_key_pos,llbuff2)); if (param->testflag & T_VERBOSE) { VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey, @@ -1530,8 +2029,7 @@ err: my_close(new_file,MYF(0)); info->dfile=new_file= -1; if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, - DATA_TMP_EXT, share->base.raid_chunks, - (param->testflag & T_BACKUP_DATA ? + DATA_TMP_EXT, (param->testflag & T_BACKUP_DATA ? MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || _ma_open_datafile(info,share,-1)) got_error=1; @@ -1545,26 +2043,20 @@ err: if (new_file >= 0) { VOID(my_close(new_file,MYF(0))); - VOID(my_raid_delete(param->temp_filename,info->s->base.raid_chunks, - MYF(MY_WME))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); info->rec_cache.file=-1; /* don't flush data to new_file, it's closed */ } maria_mark_crashed_on_repair(info); } - my_free(_ma_get_rec_buff_ptr(info, sort_param.rec_buff), - MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); VOID(end_io_cache(¶m->read_cache)); info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); VOID(end_io_cache(&info->rec_cache)); got_error|=_ma_flush_blocks(param, share->key_cache, share->kfile); - if (!got_error && param->testflag & T_UNPACK) - { - share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; - share->pack.header_length=0; - share->data_file_type=sort_info.new_data_file_type; - } + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES | STATE_NOT_ANALYZED); DBUG_RETURN(got_error); @@ -1577,10 +2069,10 @@ static int writekeys(HA_CHECK *param, register MARIA_HA *info, byte *buff, my_off_t filepos) { register uint i; - uchar *key; + byte *key; DBUG_ENTER("writekeys"); - key=info->lastkey+info->s->base.max_key_length; + key= info->lastkey+info->s->base.max_key_length; for (i=0 ; i < info->s->base.keys ; i++) { if (maria_is_key_active(info->s->state.key_map, i)) @@ -1632,7 +2124,7 @@ static int writekeys(HA_CHECK *param, register MARIA_HA *info, byte *buff, } /* Remove checksum that was added to glob_crc in sort_get_next_record */ if (param->calc_checksum) - param->glob_crc-= info->checksum; + param->glob_crc-= info->cur_row.checksum; DBUG_PRINT("error",("errno: %d",my_errno)); DBUG_RETURN(-1); } /* writekeys */ @@ -1640,15 +2132,16 @@ static int writekeys(HA_CHECK *param, register MARIA_HA *info, byte *buff, /* Change all key-pointers that points to a records */ -int maria_movepoint(register MARIA_HA *info, byte *record, my_off_t oldpos, - my_off_t newpos, uint prot_key) +int maria_movepoint(register MARIA_HA *info, byte *record, + MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos, + uint prot_key) { register uint i; - uchar *key; + byte *key; uint key_length; DBUG_ENTER("maria_movepoint"); - key=info->lastkey+info->s->base.max_key_length; + key= info->lastkey+info->s->base.max_key_length; for (i=0 ; i < info->s->base.keys; i++) { if (i != prot_key && maria_is_key_active(info->s->state.key_map, i)) @@ -1764,7 +2257,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) index_pos[key]= HA_OFFSET_ERROR; /* No blocks */ } - /* Flush key cache for this file if we are calling this outside mariachk */ + /* Flush key cache for this file if we are calling this outside maria_chk */ flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); share->state.version=(ulong) time((time_t*) 0); @@ -1779,8 +2272,8 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) VOID(my_close(share->kfile,MYF(MY_WME))); share->kfile = -1; VOID(my_close(new_file,MYF(MY_WME))); - if (maria_change_to_newfile(share->index_file_name,MARIA_NAME_IEXT,INDEX_TMP_EXT,0, - MYF(0)) || + if (maria_change_to_newfile(share->index_file_name, MARIA_NAME_IEXT, + INDEX_TMP_EXT, MYF(0)) || _ma_open_keyfile(share)) goto err2; info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */ @@ -1795,8 +2288,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); for (key=0 ; key < info->s->base.keys ; key++) info->s->state.key_root[key]=index_pos[key]; - for (key=0 ; key < info->s->state.header.max_block_size_index ; key++) - info->s->state.key_del[key]= HA_OFFSET_ERROR; + info->s->state.key_del= HA_OFFSET_ERROR; info->s->state.changed&= ~STATE_NOT_SORTED_PAGES; DBUG_RETURN(0); @@ -1811,12 +2303,13 @@ err2: /* Sort records recursive using one index */ -static int sort_one_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, my_off_t pagepos, File new_file) { uint length,nod_flag,used_length, key_length; - uchar *buff,*keypos,*endpos; - uchar key[HA_MAX_POSSIBLE_KEY_BUFF]; + byte *buff,*keypos,*endpos; + byte key[HA_MAX_POSSIBLE_KEY_BUFF]; my_off_t new_page_pos,next_page; char llbuff[22]; DBUG_ENTER("sort_one_index"); @@ -1824,7 +2317,7 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo new_page_pos=param->new_file_pos; param->new_file_pos+=keyinfo->block_length; - if (!(buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + if (!(buff= (byte*) my_alloca((uint) keyinfo->block_length))) { _ma_check_print_error(param,"Not enough memory for key block"); DBUG_RETURN(-1); @@ -1907,9 +2400,7 @@ err: */ int maria_change_to_newfile(const char * filename, const char * old_ext, - const char * new_ext, - uint raid_chunks __attribute__((unused)), - myf MyFlags) + const char * new_ext, myf MyFlags) { char old_filename[FN_REFLEN],new_filename[FN_REFLEN]; #ifdef USE_RAID @@ -1981,7 +2472,7 @@ err: */ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, - const char * name, int rep_quick) + const char * name, int rep_quick) { int got_error; uint i; @@ -2034,7 +2525,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, MYF(0))) || - !_ma_alloc_rec_buff(info, -1, &sort_param.rec_buff)) + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) { _ma_check_print_error(param, "Not enough memory for extra record"); goto err; @@ -2042,14 +2534,11 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { /* Get real path for data file */ - if ((new_file=my_raid_create(fn_format(param->temp_filename, - share->data_file_name, "", - DATA_TMP_EXT, 2+4), - 0,param->tmpfile_createflag, - share->base.raid_type, - share->base.raid_chunks, - share->base.raid_chunksize, - MYF(0))) < 0) + if ((new_file=my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", param->temp_filename); @@ -2059,10 +2548,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, "datafile-header")) goto err; if (param->testflag & T_UNPACK) - { - share->options&= ~HA_OPTION_COMPRESS_RECORD; - mi_int2store(share->state.header.options,share->options); - } + restore_data_file_type(share); share->state.dellink= HA_OFFSET_ERROR; info->rec_cache.file=new_file; } @@ -2072,14 +2558,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, { /* Flush key cache for this file if we are calling this outside - mariachk + maria_chk */ flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); /* Clear the pointers to the given rows */ for (i=0 ; i < share->base.keys ; i++) share->state.key_root[i]= HA_OFFSET_ERROR; - for (i=0 ; i < share->state.header.max_block_size_index ; i++) - share->state.key_del[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; info->state->key_file_length=share->base.keystart; } else @@ -2191,13 +2676,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, 10*param->sort_buffer_length/sort_param.key_length; } - sort_param.key_read=sort_maria_ft_key_read; - sort_param.key_write=sort_maria_ft_key_write; + sort_param.key_read= sort_maria_ft_key_read; + sort_param.key_write= sort_maria_ft_key_write; } else { - sort_param.key_read=sort_key_read; - sort_param.key_write=sort_key_write; + sort_param.key_read= sort_key_read; + sort_param.key_write= sort_key_write; } if (_ma_create_index_by_sort(&sort_param, @@ -2276,7 +2761,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif - if (skr != sort_info.filelength && !info->s->base.raid_type) + if (skr != sort_info.filelength) if (my_chsize(info->dfile,skr,0,MYF(0))) _ma_check_print_warning(param, "Can't change size of datafile, error: %d", @@ -2315,9 +2800,9 @@ err: my_close(new_file,MYF(0)); info->dfile=new_file= -1; if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, - DATA_TMP_EXT, share->base.raid_chunks, - (param->testflag & T_BACKUP_DATA ? - MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || _ma_open_datafile(info,share,-1)) got_error=1; } @@ -2329,8 +2814,7 @@ err: if (new_file >= 0) { VOID(my_close(new_file,MYF(0))); - VOID(my_raid_delete(param->temp_filename,share->base.raid_chunks, - MYF(MY_WME))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); if (info->dfile == new_file) info->dfile= -1; } @@ -2340,8 +2824,7 @@ err: share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; share->state.changed|=STATE_NOT_SORTED_PAGES; - my_free(_ma_get_rec_buff_ptr(info, sort_param.rec_buff), - MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); @@ -2349,10 +2832,7 @@ err: VOID(end_io_cache(¶m->read_cache)); info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); if (!got_error && (param->testflag & T_UNPACK)) - { - share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; - share->pack.header_length=0; - } + restore_data_file_type(share); DBUG_RETURN(got_error); } @@ -2435,15 +2915,12 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { /* Get real path for data file */ - if ((new_file=my_raid_create(fn_format(param->temp_filename, - share->data_file_name, "", - DATA_TMP_EXT, - 2+4), - 0,param->tmpfile_createflag, - share->base.raid_type, - share->base.raid_chunks, - share->base.raid_chunksize, - MYF(0))) < 0) + if ((new_file= my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, + 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", param->temp_filename); @@ -2453,10 +2930,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, "datafile-header")) goto err; if (param->testflag & T_UNPACK) - { - share->options&= ~HA_OPTION_COMPRESS_RECORD; - mi_int2store(share->state.header.options,share->options); - } + restore_data_file_type(share); share->state.dellink= HA_OFFSET_ERROR; info->rec_cache.file=new_file; } @@ -2466,14 +2940,13 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, { /* Flush key cache for this file if we are calling this outside - mariachk + maria_chk */ flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); /* Clear the pointers to the given rows */ for (i=0 ; i < share->base.keys ; i++) share->state.key_root[i]= HA_OFFSET_ERROR; - for (i=0 ; i < share->state.header.max_block_size_index ; i++) - share->state.key_del[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; info->state->key_file_length=share->base.keystart; } else @@ -2573,12 +3046,12 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, sort_param[i].record= (((char *)(sort_param+share->base.keys))+ (share->base.pack_reclength * i)); - if (!_ma_alloc_rec_buff(info, -1, &sort_param[i].rec_buff)) + if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size, + share->base.default_rec_buff_size)) { _ma_check_print_error(param,"Not enough memory!"); goto err; } - sort_param[i].key_length=share->rec_reflength; for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END; keyseg++) @@ -2698,7 +3171,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif - if (skr != sort_info.filelength && !info->s->base.raid_type) + if (skr != sort_info.filelength) if (my_chsize(info->dfile,skr,0,MYF(0))) _ma_check_print_warning(param, "Can't change size of datafile, error: %d", @@ -2736,9 +3209,9 @@ err: my_close(new_file,MYF(0)); info->dfile=new_file= -1; if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, - DATA_TMP_EXT, share->base.raid_chunks, - (param->testflag & T_BACKUP_DATA ? - MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || _ma_open_datafile(info,share,-1)) got_error=1; } @@ -2750,8 +3223,7 @@ err: if (new_file >= 0) { VOID(my_close(new_file,MYF(0))); - VOID(my_raid_delete(param->temp_filename,share->base.raid_chunks, - MYF(MY_WME))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); if (info->dfile == new_file) info->dfile= -1; } @@ -2771,21 +3243,18 @@ err: VOID(end_io_cache(¶m->read_cache)); info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); if (!got_error && (param->testflag & T_UNPACK)) - { - share->state.header.options[0]&= (uchar) ~HA_OPTION_COMPRESS_RECORD; - share->pack.header_length=0; - } + restore_data_file_type(share); DBUG_RETURN(got_error); #endif /* THREAD */ } /* Read next record and return next key */ -static int sort_key_read(MARIA_SORT_PARAM *sort_param, void *key) +static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key) { int error; - MARIA_SORT_INFO *sort_info=sort_param->sort_info; - MARIA_HA *info=sort_info->info; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + MARIA_HA *info= sort_info->info; DBUG_ENTER("sort_key_read"); if ((error=sort_get_next_record(sort_param))) @@ -2799,7 +3268,7 @@ static int sort_key_read(MARIA_SORT_PARAM *sort_param, void *key) } sort_param->real_key_length= (info->s->rec_reflength+ - _ma_make_key(info, sort_param->key, (uchar*) key, + _ma_make_key(info, sort_param->key, key, sort_param->record, sort_param->filepos)); #ifdef HAVE_purify bzero(key+sort_param->real_key_length, @@ -2808,7 +3277,8 @@ static int sort_key_read(MARIA_SORT_PARAM *sort_param, void *key) DBUG_RETURN(_ma_sort_write_record(sort_param)); } /* sort_key_read */ -static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, void *key) + +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key) { int error; MARIA_SORT_INFO *sort_info=sort_param->sort_info; @@ -2841,7 +3311,8 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, void *key) sort_param->real_key_length=(info->s->rec_reflength+ _ma_ft_make_key(info, sort_param->key, - key, wptr++, sort_param->filepos)); + key, wptr++, + sort_param->filepos)); #ifdef HAVE_purify if (sort_param->key_length > sort_param->real_key_length) bzero(key+sort_param->real_key_length, @@ -2881,6 +3352,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(1); switch (share->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); + break; case STATIC_RECORD: for (;;) { @@ -2904,7 +3378,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (*sort_param->record) { if (param->calc_checksum) - param->glob_crc+= (info->checksum= + param->glob_crc+= (info->cur_row.checksum= _ma_static_checksum(info,sort_param->record)); DBUG_RETURN(0); } @@ -3088,8 +3562,11 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) sort_param->pos=block_info.filepos+block_info.block_len; if (share->base.blobs) { - if (!(to=_ma_alloc_rec_buff(info,block_info.rec_len, - &(sort_param->rec_buff)))) + if (_ma_alloc_buffer(&sort_param->rec_buff, + &sort_param->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + { if (param->max_record_length >= block_info.rec_len) { @@ -3107,8 +3584,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } } - else - to= sort_param->rec_buff; + to= sort_param->rec_buff; } if (left_length < block_info.data_len || ! block_info.data_len) { @@ -3158,7 +3634,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->read_cache.error < 0) DBUG_RETURN(1); if (info->s->calc_checksum) - info->checksum=_ma_checksum(info,sort_param->record); + info->cur_row.checksum= _ma_checksum(info,sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, @@ -3172,7 +3648,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } if (param->calc_checksum) - param->glob_crc+= info->checksum; + param->glob_crc+= info->cur_row.checksum; DBUG_RETURN(0); } if (!searching) @@ -3230,7 +3706,6 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) llstr(sort_param->pos,llbuff)); continue; } - info->checksum=_ma_checksum(info,sort_param->record); if (!sort_param->fix_datafile) { sort_param->filepos=sort_param->pos; @@ -3240,8 +3715,12 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos=block_info.filepos+ block_info.rec_len); info->packed_length=block_info.rec_len; - if (param->calc_checksum) - param->glob_crc+= info->checksum; + + { + info->cur_row.checksum= (*info->s->calc_checksum)(info, + sort_param->record); + param->glob_crc+= info->cur_row.checksum; + } DBUG_RETURN(0); } } @@ -3267,6 +3746,9 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) if (sort_param->fix_datafile) { switch (sort_info->new_data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); + break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, share->base.pack_reclength)) @@ -3276,7 +3758,6 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) } sort_param->filepos+=share->base.pack_reclength; info->s->state.split++; - /* sort_info->param->glob_crc+=_ma_static_checksum(info, sort_param->record); */ break; case DYNAMIC_RECORD: if (! info->blobs) @@ -3298,10 +3779,9 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) } from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); } - info->checksum=_ma_checksum(info,sort_param->record); + info->cur_row.checksum= _ma_checksum(info,sort_param->record); reclength= _ma_rec_pack(info,from,sort_param->record); flag=0; - /* sort_info->param->glob_crc+=info->checksum; */ do { @@ -3322,7 +3802,6 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) sort_param->filepos+=block_length; info->s->state.split++; } while (reclength); - /* sort_info->param->glob_crc+=info->checksum; */ break; case COMPRESSED_RECORD: reclength=info->packed_length; @@ -3337,7 +3816,6 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) _ma_check_print_error(param,"%d when writing to datafile",my_errno); DBUG_RETURN(1); } - /* sort_info->param->glob_crc+=info->checksum; */ sort_param->filepos+=reclength+length; info->s->state.split++; break; @@ -3369,7 +3847,7 @@ static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, } /* sort_key_cmp */ -static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a) { uint diff_pos[2]; char llbuff[22],llbuff2[22]; @@ -3379,11 +3857,11 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) if (sort_info->key_block->inited) { - cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, diff_pos); if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) - ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) @@ -3391,7 +3869,7 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg, sort_param->notnull, sort_info->key_block->lastkey, - (uchar*)a); + a); } sort_param->unique[diff_pos[0]-1]++; } @@ -3400,17 +3878,17 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) cmp= -1; if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, - (uchar*)a); + a); } if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) { sort_info->dupp++; - sort_info->info->lastpos=get_record_for_key(sort_info->info, - sort_param->keyinfo, - (uchar*) a); + sort_info->info->cur_row.lastpos= get_record_for_key(sort_info->info, + sort_param->keyinfo, + a); _ma_check_print_warning(param, "Duplicate key for record at %10s against record at %10s", - llstr(sort_info->info->lastpos,llbuff), + llstr(sort_info->info->cur_row.lastpos, llbuff), llstr(get_record_for_key(sort_info->info, sort_param->keyinfo, sort_info->key_block-> @@ -3418,7 +3896,7 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) llbuff2)); param->testflag|=T_RETRY_WITHOUT_QUICK; if (sort_info->param->testflag & T_VERBOSE) - _ma_print_key(stdout,sort_param->seg,(uchar*) a, USE_WHOLE_KEY); + _ma_print_key(stdout,sort_param->seg, a, USE_WHOLE_KEY); return (sort_delete_record(sort_param)); } #ifndef DBUG_OFF @@ -3429,10 +3907,11 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const void *a) return(1); } #endif - return (sort_insert_key(sort_param,sort_info->key_block, - (uchar*) a, HA_OFFSET_ERROR)); + return (sort_insert_key(sort_param, sort_info->key_block, + a, HA_OFFSET_ERROR)); } /* sort_key_write */ + int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param) { MARIA_SORT_INFO *sort_info=sort_param->sort_info; @@ -3441,24 +3920,24 @@ int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param) uint val_off, val_len; int error; SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf; - uchar *from, *to; + byte *from, *to; val_len=share->ft2_keyinfo.keylength; get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey); - to=maria_ft_buf->lastkey+val_off; + to= maria_ft_buf->lastkey+val_off; if (maria_ft_buf->buf) { /* flushing first-level tree */ - error=sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, - HA_OFFSET_ERROR); + error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); for (from=to+val_len; !error && from < maria_ft_buf->buf; from+= val_len) { memcpy(to, from, val_len); - error=sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, - HA_OFFSET_ERROR); + error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); } return error; } @@ -3478,10 +3957,11 @@ int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param) maria_ft_buf->lastkey,HA_OFFSET_ERROR); } -static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, const void *a) + +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, + const byte *a) { uint a_len, val_off, val_len, error; - uchar *p; MARIA_SORT_INFO *sort_info= sort_param->sort_info; SORT_FT_BUF *ft_buf= sort_info->ft_buf; SORT_KEY_BLOCKS *key_block= sort_info->key_block; @@ -3514,13 +3994,14 @@ static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, const void *a) if (ha_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, - ft_buf->lastkey+1,val_off-1, 0, 0)==0) + (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0) { + byte *p; if (!ft_buf->buf) /* store in second-level tree */ { ft_buf->count++; return sort_insert_key(sort_param,key_block, - ((uchar *)a)+a_len, HA_OFFSET_ERROR); + a + a_len, HA_OFFSET_ERROR); } /* storing the key in the buffer. */ @@ -3569,21 +4050,22 @@ word_init_ft_buf: /* get pointer to record from a key */ static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key) + const byte *key) { - return _ma_dpos(info,0,key + _ma_keylength(keyinfo,key)); + return _ma_dpos(info,0, key + _ma_keylength(keyinfo, key)); } /* get_record_for_key */ /* Insert a key in sort-key-blocks */ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, - register SORT_KEY_BLOCKS *key_block, uchar *key, + register SORT_KEY_BLOCKS *key_block, + const byte *key, my_off_t prev_block) { uint a_length,t_length,nod_flag; my_off_t filepos,key_file_length; - uchar *anc_buff,*lastkey; + byte *anc_buff,*lastkey; MARIA_KEY_PARAM s_temp; MARIA_HA *info; MARIA_KEYDEF *keyinfo=sort_param->keyinfo; @@ -3591,7 +4073,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_insert_key"); - anc_buff=key_block->buff; + anc_buff= key_block->buff; info=sort_info->info; lastkey=key_block->lastkey; nod_flag= (key_block == sort_info->key_block ? 0 : @@ -3617,7 +4099,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, _ma_kpointer(info,key_block->end_pos,prev_block); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, - (uchar*) 0,lastkey,lastkey,key, + (byte*) 0,lastkey,lastkey,key, &s_temp); (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); a_length+=t_length; @@ -3625,14 +4107,14 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, key_block->end_pos+=t_length; if (a_length <= keyinfo->block_length) { - VOID(_ma_move_key(keyinfo,key_block->lastkey,key)); + VOID(_ma_move_key(keyinfo, key_block->lastkey, key)); key_block->last_length=a_length-t_length; DBUG_RETURN(0); } /* Fill block with end-zero and write filled block */ maria_putint(anc_buff,key_block->last_length,nod_flag); - bzero((byte*) anc_buff+key_block->last_length, + bzero(anc_buff+key_block->last_length, keyinfo->block_length- key_block->last_length); key_file_length=info->state->key_file_length; if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) @@ -3644,10 +4126,10 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, if (_ma_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff)) DBUG_RETURN(1); } - else if (my_pwrite(info->s->kfile,(byte*) anc_buff, + else if (my_pwrite(info->s->kfile,anc_buff, (uint) keyinfo->block_length,filepos, param->myf_rw)) DBUG_RETURN(1); - DBUG_DUMP("buff",(byte*) anc_buff,maria_getint(anc_buff)); + DBUG_DUMP("buff",anc_buff,maria_getint(anc_buff)); /* Write separator-key to block in next level */ if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) @@ -3665,7 +4147,7 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) { uint i; int old_file,error; - uchar *key; + byte *key; MARIA_SORT_INFO *sort_info=sort_param->sort_info; HA_CHECK *param=sort_info->param; MARIA_HA *info=sort_info->info; @@ -3680,7 +4162,7 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) if (info->s->options & HA_OPTION_COMPRESS_RECORD) { _ma_check_print_error(param, - "Recover aborted; Can't run standard recovery on compressed tables with errors in data-file. Use switch 'mariachk --safe-recover' to fix it\n",stderr);; + "Recover aborted; Can't run standard recovery on compressed tables with errors in data-file. Use switch 'maria_chk --safe-recover' to fix it\n",stderr);; DBUG_RETURN(1); } @@ -3688,8 +4170,9 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) info->dfile=info->rec_cache.file; if (sort_info->current_key) { - key=info->lastkey+info->s->base.max_key_length; - if ((error=(*info->s->read_rnd)(info,sort_param->record,info->lastpos,0)) && + key= info->lastkey+info->s->base.max_key_length; + if ((error=(*info->s->read_record)(info,sort_param->record, + info->cur_row.lastpos)) && error != HA_ERR_RECORD_DELETED) { _ma_check_print_error(param,"Can't read record to be removed"); @@ -3699,10 +4182,13 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) for (i=0 ; i < sort_info->current_key ; i++) { - uint key_length= _ma_make_key(info,i,key,sort_param->record,info->lastpos); - if (_ma_ck_delete(info,i,key,key_length)) + uint key_length= _ma_make_key(info, i, key, sort_param->record, + info->cur_row.lastpos); + if (_ma_ck_delete(info, i, key, key_length)) { - _ma_check_print_error(param,"Can't delete key %d from record to be removed",i+1); + _ma_check_print_error(param, + "Can't delete key %d from record to be removed", + i+1); info->dfile=old_file; DBUG_RETURN(1); } @@ -3738,7 +4224,7 @@ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) if (nod_flag) _ma_kpointer(info,key_block->end_pos,filepos); key_file_length=info->state->key_file_length; - bzero((byte*) key_block->buff+length, keyinfo->block_length-length); + bzero(key_block->buff+length, keyinfo->block_length-length); if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) DBUG_RETURN(1); @@ -3749,10 +4235,10 @@ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) DFLT_INIT_HITS, key_block->buff)) DBUG_RETURN(1); } - else if (my_pwrite(info->s->kfile,(byte*) key_block->buff, + else if (my_pwrite(info->s->kfile,key_block->buff, (uint) keyinfo->block_length,filepos, myf_rw)) DBUG_RETURN(1); - DBUG_DUMP("buff",(byte*) key_block->buff,length); + DBUG_DUMP("buff",key_block->buff,length); nod_flag=1; } info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */ @@ -3768,9 +4254,9 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, SORT_KEY_BLOCKS *block; DBUG_ENTER("alloc_key_blocks"); - if (!(block=(SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+ - buffer_length+IO_SIZE)*blocks, - MYF(0)))) + if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+ + buffer_length+IO_SIZE)*blocks, + MYF(0)))) { _ma_check_print_error(param,"Not enough memory for sort-key-blocks"); return(0); @@ -3778,7 +4264,7 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, for (i=0 ; i < blocks ; i++) { block[i].inited=0; - block[i].buff=(uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i; + block[i].buff= (byte*) (block+blocks)+(buffer_length+IO_SIZE)*i; } DBUG_RETURN(block); } /* alloc_key_blocks */ @@ -3821,7 +4307,8 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) share= *(*org_info)->s; unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && (param->testflag & T_UNPACK); - if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF)*share.base.keys))) + if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) * + share.base.keys))) DBUG_RETURN(0); memcpy((byte*) keyinfo,(byte*) share.keyinfo, (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys)); @@ -3877,8 +4364,10 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) keyseg++; /* Skip end pointer */ } - /* Copy the unique definitions and change them to point at the new key - segments*/ + /* + Copy the unique definitions and change them to point at the new key + segments + */ memcpy((byte*) uniquedef,(byte*) share.uniqueinfo, (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques))); for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques; @@ -3894,7 +4383,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) (ulong) share.base.min_pack_length); else max_records=0; - unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && + unpack= (share.data_file_type == COMPRESSED_RECORD) && (param->testflag & T_UNPACK); share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD; @@ -3916,21 +4405,29 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) create_info.language = (param->language ? param->language : share.state.header.language); create_info.key_file_length= status_info.key_file_length; + create_info.org_data_file_type= ((enum data_file_type) + share.state.header.org_data_file_type); + /* Allow for creating an auto_increment key. This has an effect only if an auto_increment key exists in the original table. */ create_info.with_auto_increment= TRUE; - /* We don't have to handle symlinks here because we are using - HA_DONT_TOUCH_DATA */ - if (maria_create(filename, - share.base.keys - share.state.header.uniques, - keyinfo, share.base.fields, recdef, - share.state.header.uniques, uniquedef, - &create_info, - HA_DONT_TOUCH_DATA)) + create_info.null_bytes= share.base.null_bytes; + /* + We don't have to handle symlinks here because we are using + HA_DONT_TOUCH_DATA + */ + if (maria_create(filename, share.data_file_type, + share.base.keys - share.state.header.uniques, + keyinfo, share.base.fields, recdef, + share.state.header.uniques, uniquedef, + &create_info, + HA_DONT_TOUCH_DATA)) { - _ma_check_print_error(param,"Got error %d when trying to recreate indexfile",my_errno); + _ma_check_print_error(param, + "Got error %d when trying to recreate indexfile", + my_errno); goto end; } *org_info=maria_open(filename,O_RDWR, @@ -3939,8 +4436,9 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) HA_OPEN_ABORT_IF_LOCKED); if (!*org_info) { - _ma_check_print_error(param,"Got error %d when trying to open re-created indexfile", - my_errno); + _ma_check_print_error(param, + "Got error %d when trying to open re-created indexfile", + my_errno); goto end; } /* We are modifing */ @@ -3990,7 +4488,8 @@ int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) return 0; } - /* Update state and mariachk_time of indexfile */ + +/* Update state and maria_chk time of indexfile */ int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) { @@ -4303,12 +4802,7 @@ set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) { MARIA_SHARE tmp; - - if (share->options & HA_OPTION_PACK_RECORD) - sort_info->new_data_file_type = DYNAMIC_RECORD; - else - sort_info->new_data_file_type = STATIC_RECORD; - + sort_info->new_data_file_type= share->state.header.org_data_file_type; /* Set delete_function for sort_delete_record() */ memcpy((char*) &tmp, share, sizeof(*share)); tmp.options= ~HA_OPTION_COMPRESS_RECORD; @@ -4316,3 +4810,13 @@ set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) share->delete_record=tmp.delete_record; } } + +static void restore_data_file_type(MARIA_SHARE *share) +{ + share->options&= ~HA_OPTION_COMPRESS_RECORD; + mi_int2store(share->state.header.options,share->options); + share->state.header.data_file_type= + share->state.header.org_data_file_type; + share->data_file_type= share->state.header.data_file_type= + share->pack.header_length= 0; +} diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c index 054873706a4..1b0f683fe63 100644 --- a/storage/maria/ma_checksum.c +++ b/storage/maria/ma_checksum.c @@ -18,23 +18,27 @@ #include "maria_def.h" -ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf) +ha_checksum _ma_checksum(MARIA_HA *info, const byte *record) { uint i; ha_checksum crc=0; MARIA_COLUMNDEF *rec=info->s->rec; - for (i=info->s->base.fields ; i-- ; buf+=(rec++)->length) + if (info->s->base.null_bytes) + crc= my_checksum(crc, record, info->s->base.null_bytes); + + for (i=info->s->base.fields ; i-- ; ) { - const byte *pos; + const byte *pos= record + rec->offset; ulong length; + switch (rec->type) { case FIELD_BLOB: { length= _ma_calc_blob_length(rec->length- maria_portable_sizeof_char_ptr, - buf); - memcpy((char*) &pos, buf+rec->length- maria_portable_sizeof_char_ptr, + pos); + memcpy((char*) &pos, pos+rec->length- maria_portable_sizeof_char_ptr, sizeof(char*)); break; } @@ -42,18 +46,17 @@ ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf) { uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1); if (pack_length == 1) - length= (ulong) *(uchar*) buf; + length= (ulong) *(uchar*) pos; else - length= uint2korr(buf); - pos= buf+pack_length; + length= uint2korr(pos); + pos+= pack_length; break; } default: - length=rec->length; - pos=buf; + length= rec->length; break; } - crc=my_checksum(crc, pos ? pos : "", length); + crc= my_checksum(crc, pos ? pos : "", length); } return crc; } diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 5b940eaf4c3..e5d2985ce47 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -28,7 +28,7 @@ int maria_close(register MARIA_HA *info) int error=0,flag; MARIA_SHARE *share=info->s; DBUG_ENTER("maria_close"); - DBUG_PRINT("enter",("base: %lx reopen: %u locks: %u", + DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u", info,(uint) share->reopen, (uint) share->tot_locks)); pthread_mutex_lock(&THR_LOCK_maria); @@ -60,16 +60,22 @@ int maria_close(register MARIA_HA *info) maria_open_list=list_delete(maria_open_list,&info->open_list); pthread_mutex_unlock(&share->intern_lock); - my_free(_ma_get_rec_buff_ptr(info, info->rec_buff), MYF(MY_ALLOW_ZERO_PTR)); + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + (share->end)(info); + + if (info->s->data_file_type == BLOCK_RECORD) + info->dfile= -1; /* Closed in ma_end_once_block_row */ if (flag) { - if (share->kfile >= 0 && - flush_key_blocks(share->key_cache, share->kfile, - share->temporary ? FLUSH_IGNORE_CHANGED : - FLUSH_RELEASE)) - error=my_errno; if (share->kfile >= 0) { + if ((*share->once_end)(share)) + error= my_errno; + if (flush_key_blocks(share->key_cache, share->kfile, + share->temporary ? FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE)) + error= my_errno; + /* If we are crashed, we can safely flush the current state as it will not change the crashed state. @@ -78,18 +84,13 @@ int maria_close(register MARIA_HA *info) */ if (share->mode != O_RDONLY && maria_is_crashed(info)) _ma_state_info_write(share->kfile, &share->state, 1); - if (my_close(share->kfile,MYF(0))) - error = my_errno; + if (my_close(share->kfile, MYF(0))) + error= my_errno; } #ifdef HAVE_MMAP if (share->file_map) _ma_unmap_file(info); #endif - if (share->decode_trees) - { - my_free((gptr) share->decode_trees,MYF(0)); - my_free((gptr) share->decode_tables,MYF(0)); - } #ifdef THREAD thr_lock_delete(&share->lock); VOID(pthread_mutex_destroy(&share->intern_lock)); diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index b9fb4eb0d5b..82585e78ba7 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -18,6 +18,7 @@ #include "ma_ftdefs.h" #include "ma_sp_defs.h" +#include "ma_blockrec.h" #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ @@ -28,41 +29,48 @@ #endif #include +static int compare_columns(MARIA_COLUMNDEF **a, MARIA_COLUMNDEF **b); + /* Old options is used when recreating database, from maria_chk */ -int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, - uint columns, MARIA_COLUMNDEF *recinfo, - uint uniques, MARIA_UNIQUEDEF *uniquedefs, - MARIA_CREATE_INFO *ci,uint flags) +int maria_create(const char *name, enum data_file_type record_type, + uint keys,MARIA_KEYDEF *keydefs, + uint columns, MARIA_COLUMNDEF *recinfo, + uint uniques, MARIA_UNIQUEDEF *uniquedefs, + MARIA_CREATE_INFO *ci,uint flags) { register uint i,j; File dfile,file; int errpos,save_errno, create_mode= O_RDWR | O_TRUNC; myf create_flag; - uint fields,length,max_key_length,packed,pointer,real_length_diff, + uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff, key_length,info_length,key_segs,options,min_key_length_skip, base_pos,long_varchar_count,varchar_length, - max_key_block_length,unique_key_parts,fulltext_keys,offset; - uint aligned_key_start, block_length; + unique_key_parts,fulltext_keys,offset; + uint max_field_lengths, extra_header_size; ulong reclength, real_reclength,min_pack_length; char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; ulong pack_reclength; ulonglong tot_length,max_rows, tmp; enum en_fieldtype type; + enum data_file_type org_record_type= record_type; MARIA_SHARE share; MARIA_KEYDEF *keydef,tmp_keydef; MARIA_UNIQUEDEF *uniquedef; HA_KEYSEG *keyseg,tmp_keyseg; - MARIA_COLUMNDEF *rec; + MARIA_COLUMNDEF *rec, *rec_end; ulong *rec_per_key_part; - my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MARIA_MAX_KEY_BLOCK_SIZE]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY]; MARIA_CREATE_INFO tmp_create_info; DBUG_ENTER("maria_create"); DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", keys, columns, uniques, flags)); + LINT_INIT(dfile); + LINT_INIT(file); + if (!ci) { bzero((char*) &tmp_create_info,sizeof(tmp_create_info)); @@ -73,22 +81,24 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, { DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION); } - LINT_INIT(dfile); - LINT_INIT(file); errpos=0; options=0; bzero((byte*) &share,sizeof(share)); if (flags & HA_DONT_TOUCH_DATA) { + org_record_type= ci->org_data_file_type; if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD)) options=ci->old_options & (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD | HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); else + { + /* Uncompressing rows */ options=ci->old_options & (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + } } if (ci->reloc_rows > ci->max_rows) @@ -101,59 +111,90 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, /* Start by checking fields and field-types used */ - reclength=varchar_length=long_varchar_count=packed= - min_pack_length=pack_reclength=0; - for (rec=recinfo, fields=0 ; - fields != columns ; - rec++,fields++) + varchar_length=long_varchar_count=packed= + pack_reclength= max_field_lengths= 0; + reclength= min_pack_length= ci->null_bytes; + + for (rec= recinfo, rec_end= rec + columns ; rec != rec_end ; rec++) { - reclength+=rec->length; - if ((type=(enum en_fieldtype) rec->type) != FIELD_NORMAL && - type != FIELD_CHECK) + /* Fill in not used struct parts */ + rec->offset= reclength; + rec->empty_pos= 0; + rec->empty_bit= 0; + rec->fill_length= rec->length; + + reclength+= rec->length; + type= rec->type; + if (type == FIELD_SKIP_PRESPACE && record_type == BLOCK_RECORD) + type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */ + + if (type != FIELD_NORMAL && type != FIELD_CHECK) { - packed++; + rec->empty_pos= packed/8; + rec->empty_bit= (1 << (packed & 7)); if (type == FIELD_BLOB) { + packed++; share.base.blobs++; if (pack_reclength != INT_MAX32) { if (rec->length == 4+maria_portable_sizeof_char_ptr) pack_reclength= INT_MAX32; else - pack_reclength+=(1 << ((rec->length-maria_portable_sizeof_char_ptr)*8)); /* Max blob length */ + { + /* Add max possible blob length */ + pack_reclength+= (1 << ((rec->length- + maria_portable_sizeof_char_ptr)*8)); + } } + max_field_lengths+= (rec->length - maria_portable_sizeof_char_ptr); } else if (type == FIELD_SKIP_PRESPACE || type == FIELD_SKIP_ENDSPACE) { - if (pack_reclength != INT_MAX32) - pack_reclength+= rec->length > 255 ? 2 : 1; + max_field_lengths+= rec->length > 255 ? 2 : 1; min_pack_length++; + packed++; } else if (type == FIELD_VARCHAR) { varchar_length+= rec->length-1; /* Used for min_pack_length */ - packed--; pack_reclength++; min_pack_length++; + max_field_lengths++; + packed++; /* We must test for 257 as length includes pack-length */ if (test(rec->length >= 257)) { long_varchar_count++; - pack_reclength+= 2; /* May be packed on 3 bytes */ + max_field_lengths++; } } else if (type != FIELD_SKIP_ZERO) { min_pack_length+=rec->length; - packed--; /* Not a pack record type */ + rec->empty_pos= 0; + rec->empty_bit= 0; } + else + packed++; } else /* FIELD_NORMAL */ + { min_pack_length+=rec->length; + if (!rec->null_bit) + { + share.base.fixed_not_null_fields++; + share.base.fixed_not_null_fields_length+= rec->length; + } + } } if ((packed & 7) == 1) - { /* Bad packing, try to remove a zero-field */ + { + /* + Not optimal packing, try to remove a 1 byte length zero-field as + this will get same record length, but smaller pack overhead + */ while (rec != recinfo) { rec--; @@ -166,14 +207,27 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } } } + share.base.null_bytes= ci->null_bytes; + share.base.original_null_bytes= ci->null_bytes; + share.base.transactional= ci->transactional; + share.base.max_field_lengths= max_field_lengths; + share.base.field_offsets= 0; /* for future */ + + if (pack_reclength != INT_MAX32) + pack_reclength+= max_field_lengths + long_varchar_count; - if (packed || (flags & HA_PACK_RECORD)) - options|=HA_OPTION_PACK_RECORD; /* Must use packed records */ - /* We can't use checksum with static length rows */ - if (!(options & HA_OPTION_PACK_RECORD)) + if (packed && record_type == STATIC_RECORD) + record_type= BLOCK_RECORD; + if (record_type == DYNAMIC_RECORD) + options|= HA_OPTION_PACK_RECORD; /* Must use packed records */ + + if (record_type == STATIC_RECORD) + { + /* We can't use checksum with static length rows */ + flags&= ~HA_CREATE_CHECKSUM; options&= ~HA_OPTION_CHECKSUM; - if (!(options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) min_pack_length+= varchar_length; + } if (flags & HA_CREATE_TMP_TABLE) { options|= HA_OPTION_TMP_TABLE; @@ -183,18 +237,24 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, { options|= HA_OPTION_CHECKSUM; min_pack_length++; + pack_reclength++; } if (flags & HA_CREATE_DELAY_KEY_WRITE) options|= HA_OPTION_DELAY_KEY_WRITE; if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) options|= HA_OPTION_RELIES_ON_SQL_LAYER; - packed=(packed+7)/8; + pack_bytes= (packed + 7) / 8; if (pack_reclength != INT_MAX32) - pack_reclength+= reclength+packed + + pack_reclength+= reclength+pack_bytes + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); - min_pack_length+=packed; - + min_pack_length+= pack_bytes; + /* Calculate min possible row length for rows-in-block */ + extra_header_size= MAX_FIXED_HEADER_SIZE; + if (ci->transactional) + extra_header_size= TRANS_MAX_FIXED_HEADER_SIZE; + share.base.min_row_length= (extra_header_size + share.base.null_bytes + + pack_bytes); if (!ci->data_file_length && ci->max_rows) { if (pack_reclength == INT_MAX32 || @@ -204,20 +264,57 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength; } else if (!ci->max_rows) - ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length + - ((options & HA_OPTION_PACK_RECORD) ? - 3 : 0))); + { + if (record_type == BLOCK_RECORD) + { + uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) / + (min_pack_length + extra_header_size + + DIR_ENTRY_SIZE)); + ulonglong data_file_length= ci->data_file_length; + if (data_file_length) + data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) * + 8)) -1)); + if (rows_per_page > 0) + { + set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE); + ci->max_rows= ci->data_file_length / maria_block_size * rows_per_page; + } + else + ci->max_rows= ci->data_file_length / (min_pack_length + + extra_header_size + + DIR_ENTRY_SIZE); + } + else + ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length + + ((options & + HA_OPTION_PACK_RECORD) ? + 3 : 0))); + } + max_rows= (ulonglong) ci->max_rows; + if (record_type == BLOCK_RECORD) + { + /* The + 1 is for record position withing page */ + pointer= maria_get_pointer_length((ci->data_file_length / + maria_block_size), 3) + 1; + set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE); - if (options & (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD)) - pointer=maria_get_pointer_length(ci->data_file_length,maria_data_pointer_size); + if (!max_rows) + max_rows= (((((ulonglong) 1 << ((pointer-1)*8)) -1) * maria_block_size) / + min_pack_length); + } else - pointer=maria_get_pointer_length(ci->max_rows,maria_data_pointer_size); - if (!(max_rows=(ulonglong) ci->max_rows)) - max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length); - + { + if (record_type != STATIC_RECORD) + pointer= maria_get_pointer_length(ci->data_file_length, + maria_data_pointer_size); + else + pointer= maria_get_pointer_length(ci->max_rows, maria_data_pointer_size); + if (!max_rows) + max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length); + } real_reclength=reclength; - if (!(options & (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD))) + if (record_type == STATIC_RECORD) { if (reclength <= pointer) reclength=pointer+1; /* reserve place for delete link */ @@ -227,19 +324,14 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, max_key_length=0; tot_length=0 ; key_segs=0; fulltext_keys=0; - max_key_block_length=0; share.state.rec_per_key_part=rec_per_key_part; share.state.key_root=key_root; - share.state.key_del=key_del; + share.state.key_del= HA_OFFSET_ERROR; if (uniques) - { - max_key_block_length= maria_block_size; - max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer; - } + max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer; for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++) { - share.state.key_root[i]= HA_OFFSET_ERROR; min_key_length_skip=length=real_length_diff=0; key_length=pointer; @@ -253,11 +345,11 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, if (flags & HA_DONT_TOUCH_DATA) { /* - called by mariachk - i.e. table structure was taken from - MYI file and SPATIAL key *does have* additional sp_segs keysegs. - keydef->seg here points right at the GEOMETRY segment, - so we only need to decrease keydef->keysegs. - (see maria_recreate_table() in _ma_check.c) + Called by maria_chk - i.e. table structure was taken from + MYI file and SPATIAL key *does have* additional sp_segs keysegs. + keydef->seg here points right at the GEOMETRY segment, + so we only need to decrease keydef->keysegs. + (see maria_recreate_table() in _ma_check.c) */ keydef->keysegs-=sp_segs-1; } @@ -431,35 +523,22 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, key_segs) share.state.rec_per_key_part[key_segs-1]=1L; length+=key_length; - /* Get block length for key, if defined by user */ - block_length= (keydef->block_length ? - my_round_up_to_next_power(keydef->block_length) : - maria_block_size); - block_length= max(block_length, MARIA_MIN_KEY_BLOCK_LENGTH); - block_length= min(block_length, MARIA_MAX_KEY_BLOCK_LENGTH); - - keydef->block_length= MARIA_BLOCK_SIZE(length-real_length_diff, - pointer,MARIA_MAX_KEYPTR_SIZE, - block_length); - if (keydef->block_length > MARIA_MAX_KEY_BLOCK_LENGTH || - length >= HA_MAX_KEY_BUFF) + if (length >= min(HA_MAX_KEY_BUFF, MARIA_MAX_KEY_LENGTH)) { my_errno=HA_WRONG_CREATE_OPTION; goto err; } - set_if_bigger(max_key_block_length,keydef->block_length); + keydef->block_length= maria_block_size; keydef->keylength= (uint16) key_length; keydef->minlength= (uint16) (length-min_key_length_skip); keydef->maxlength= (uint16) length; if (length > max_key_length) max_key_length= length; - tot_length+= (max_rows/(ulong) (((uint) keydef->block_length-5)/ - (length*2)))* - (ulong) keydef->block_length; + tot_length+= ((max_rows/(ulong) (((uint) maria_block_size-5)/ + (length*2))) * + maria_block_size); } - for (i=max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH ; i-- ; ) - key_del[i]=HA_OFFSET_ERROR; unique_key_parts=0; offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; @@ -476,14 +555,12 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, key_segs+=uniques; /* Each unique has 1 key seg */ base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE + - max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH* - MARIA_STATE_KEYBLOCK_SIZE+ - key_segs*MARIA_STATE_KEYSEG_SIZE); - info_length=base_pos+(uint) (MARIA_BASE_INFO_SIZE+ - keys * MARIA_KEYDEF_SIZE+ - uniques * MARIA_UNIQUEDEF_SIZE + - (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ - columns*MARIA_COLUMNDEF_SIZE); + key_segs * MARIA_STATE_KEYSEG_SIZE); + info_length= base_pos+(uint) (MARIA_BASE_INFO_SIZE+ + keys * MARIA_KEYDEF_SIZE+ + uniques * MARIA_UNIQUEDEF_SIZE + + (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ + columns*MARIA_COLUMNDEF_SIZE); DBUG_PRINT("info", ("info_length: %u", info_length)); /* There are only 16 bits for the total header length. */ @@ -505,11 +582,13 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE); mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE); mi_int2store(share.state.header.base_pos,base_pos); + share.state.header.data_file_type= record_type; + share.state.header.org_data_file_type= org_record_type; share.state.header.language= (ci->language ? ci->language : default_charset_info->number); - share.state.header.max_block_size_index= max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH; share.state.dellink = HA_OFFSET_ERROR; + share.state.first_bitmap_with_space= 0; share.state.process= (ulong) getpid(); share.state.unique= (ulong) 0; share.state.update_count=(ulong) 0; @@ -518,9 +597,11 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, share.state.auto_increment=ci->auto_increment; share.options=options; share.base.rec_reflength=pointer; + share.base.block_size= maria_block_size; + /* Get estimate for index file length (this may be wrong for FT keys) */ - tmp= (tot_length + max_key_block_length * keys * - MARIA_INDEX_BLOCK_MARGIN) / MARIA_MIN_KEY_BLOCK_LENGTH; + tmp= (tot_length + maria_block_size * keys * + MARIA_INDEX_BLOCK_MARGIN) / maria_block_size; /* use maximum of key_file_length we calculated and key_file_length value we got from MYI file header (see also mariapack.c:save_state) @@ -534,13 +615,10 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, mi_int2store(share.state.header.unique_key_parts,unique_key_parts); maria_set_all_keys_active(share.state.key_map, keys); - aligned_key_start= my_round_up_to_next_power(max_key_block_length ? - max_key_block_length : - maria_block_size); share.base.keystart = share.state.state.key_file_length= - MY_ALIGN(info_length, aligned_key_start); - share.base.max_key_block_length=max_key_block_length; + MY_ALIGN(info_length, maria_block_size); + share.base.max_key_block_length= maria_block_size; share.base.max_key_length=ALIGN_SIZE(max_key_length+4); share.base.records=ci->max_rows; share.base.reloc= ci->reloc_rows; @@ -548,9 +626,9 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); share.base.max_pack_length=pack_reclength; share.base.min_pack_length=min_pack_length; - share.base.pack_bits=packed; - share.base.fields=fields; - share.base.pack_fields=packed; + share.base.pack_bytes= pack_bytes; + share.base.fields= columns; + share.base.pack_fields= packed; #ifdef USE_RAID share.base.raid_type=ci->raid_type; share.base.raid_chunks=ci->raid_chunks; @@ -559,7 +637,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, /* max_data_file_length and max_key_file_length are recalculated on open */ if (options & HA_OPTION_TMP_TABLE) - share.base.max_data_file_length=(my_off_t) ci->data_file_length; + share.base.max_data_file_length= (my_off_t) ci->data_file_length; share.base.min_block_length= (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH && @@ -632,72 +710,63 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, if (!(flags & HA_DONT_TOUCH_DATA)) { -#ifdef USE_RAID - if (share.base.raid_type) + if (ci->data_file_name) { - (void) fn_format(filename, name, "", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - if ((dfile=my_raid_create(filename, 0, create_mode, - share.base.raid_type, - share.base.raid_chunks, - share.base.raid_chunksize, - MYF(MY_WME | MY_RAID))) < 0) - goto err; - } - else -#endif - { - if (ci->data_file_name) - { - char *dext= strrchr(ci->data_file_name, '.'); - int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); + char *dext= strrchr(ci->data_file_name, '.'); + int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); - if (options & HA_OPTION_TMP_TABLE) - { - char *path; - /* chop off the table name, tempory tables use generated name */ - if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) - *path= '\0'; - fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, - MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); - } - else - { - fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | - (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); - } - fn_format(linkname, name, "",MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - linkname_ptr=linkname; - create_flag=0; + if (options & HA_OPTION_TMP_TABLE) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); } else { - fn_format(filename,name,"", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - linkname_ptr=0; - create_flag=MY_DELETE_OLD; + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); } - if ((dfile= - my_create_with_symlink(linkname_ptr, filename, 0, create_mode, - MYF(MY_WME | create_flag))) < 0) - goto err; + fn_format(linkname, name, "",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr=linkname; + create_flag=0; } + else + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr=0; + create_flag=MY_DELETE_OLD; + } + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag))) < 0) + goto err; errpos=3; + + if (record_type == BLOCK_RECORD) + { + /* Write one bitmap page */ + char buff[IO_SIZE]; + uint i; + bzero(buff, sizeof(buff)); + for (i= 0 ; i < maria_block_size ; i+= IO_SIZE) + if (my_write(dfile, (byte*) buff, sizeof(buff), MYF(MY_NABP))) + goto err; + share.state.state.data_file_length= maria_block_size; + } } DBUG_PRINT("info", ("write state info and base info")); if (_ma_state_info_write(file, &share.state, 2) || _ma_base_info_write(file, &share.base)) goto err; -#ifndef DBUG_OFF - if ((uint) my_tell(file,MYF(0)) != base_pos+ MARIA_BASE_INFO_SIZE) - { - uint pos=(uint) my_tell(file,MYF(0)); - DBUG_PRINT("warning",("base_length: %d != used_length: %d", - base_pos+ MARIA_BASE_INFO_SIZE, pos)); - } -#endif + DBUG_PRINT("info", ("base_pos: %d base_info_size: %d", + base_pos, MARIA_BASE_INFO_SIZE)); + DBUG_ASSERT(my_tell(file,MYF(0)) == base_pos+ MARIA_BASE_INFO_SIZE); /* Write key and keyseg definitions */ DBUG_PRINT("info", ("write key and keyseg definitions")); @@ -738,7 +807,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, { tmp_keydef.keysegs=1; tmp_keydef.flag= HA_UNIQUE_CHECK; - tmp_keydef.block_length= (uint16)maria_block_size; + tmp_keydef.block_length= (uint16) maria_block_size; tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer; tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength; tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE; @@ -776,6 +845,7 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } break; default: + DBUG_ASSERT((keyseg->flag & HA_VAR_LENGTH_PART) == 0); break; } if (_ma_keyseg_write(file, keyseg)) @@ -783,9 +853,27 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, } } DBUG_PRINT("info", ("write field definitions")); - for (i=0 ; i < share.base.fields ; i++) - if (_ma_recinfo_write(file, &recinfo[i])) + if (record_type == BLOCK_RECORD) + { + /* Store columns in a more efficent order */ + MARIA_COLUMNDEF **tmp, **pos; + if (!(tmp= (MARIA_COLUMNDEF**) my_malloc(share.base.fields * + sizeof(MARIA_COLUMNDEF*), + MYF(MY_WME)))) goto err; + for (rec= recinfo, pos= tmp ; rec != rec_end ; rec++, pos++) + *pos= rec; + qsort(tmp, share.base.fields, sizeof(*tmp), (qsort_cmp) compare_columns); + for (i=0 ; i < share.base.fields ; i++) + if (_ma_recinfo_write(file, tmp[i])) + goto err; + } + else + { + for (i=0 ; i < share.base.fields ; i++) + if (_ma_recinfo_write(file, &recinfo[i])) + goto err; + } #ifndef DBUG_OFF if ((uint) my_tell(file,MYF(0)) != info_length) @@ -797,7 +885,8 @@ int maria_create(const char *name,uint keys,MARIA_KEYDEF *keydefs, #endif /* Enlarge files */ - DBUG_PRINT("info", ("enlarge to keystart: %lu", (ulong) share.base.keystart)); + DBUG_PRINT("info", ("enlarge to keystart: %lu", + (ulong) share.base.keystart)); if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0))) goto err; @@ -826,7 +915,6 @@ err: VOID(my_close(dfile,MYF(0))); /* fall through */ case 2: - /* QQ: Tõnu should add a call to my_raid_delete() here */ if (! (flags & HA_DONT_TOUCH_DATA)) my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT), @@ -868,3 +956,59 @@ uint maria_get_pointer_length(ulonglong file_length, uint def) } return def; } + + +/* + Sort columns for records-in-block + + IMPLEMENTATION + Sort columns in following order: + + Fixed size, not null columns + Fixed length, null fields + Variable length fields (CHAR, VARCHAR) + Blobs + + For same kind of fields, keep fields in original order +*/ + +static inline int sign(longlong a) +{ + return a < 0 ? -1 : (a > 0 ? 1 : 0); +} + + +int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr) +{ + MARIA_COLUMNDEF *a= *a_ptr, *b= *b_ptr; + enum en_fieldtype a_type, b_type; + + a_type= (a->type == FIELD_NORMAL || a->type == FIELD_CHECK ? + FIELD_NORMAL : a->type); + b_type= (b->type == FIELD_NORMAL || b->type == FIELD_CHECK ? + FIELD_NORMAL : b->type); + + if (a_type == FIELD_NORMAL && !a->null_bit) + { + if (b_type != FIELD_NORMAL || b->null_bit) + return -1; + return sign((long) (a->offset - b->offset)); + } + if (b_type == FIELD_NORMAL && !b->null_bit) + return 1; + if (a_type == b_type) + return sign((long) (a->offset - b->offset)); + if (a_type == FIELD_NORMAL) + return -1; + if (b_type == FIELD_NORMAL) + return 1; + if (a_type == FIELD_BLOB) + return 1; + if (b_type == FIELD_BLOB) + return -1; + return sign((long) (a->offset - b->offset)); +} + + + + diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c index 7f2bff85047..596bf18bfe5 100644 --- a/storage/maria/ma_dbug.c +++ b/storage/maria/ma_dbug.c @@ -21,15 +21,15 @@ /* Print a key in user understandable format */ void _ma_print_key(FILE *stream, register HA_KEYSEG *keyseg, - const uchar *key, uint length) + const byte *key, uint length) { int flag; short int s_1; long int l_1; float f_1; double d_1; - const uchar *end; - const uchar *key_end=key+length; + const byte *end; + const byte *key_end= key + length; VOID(fputs("Key: \"",stream)); flag=0; diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index d1ad9edbed5..2669585202a 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -20,26 +20,25 @@ #include "ma_rt_index.h" static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag, - uchar *key,uint key_length,my_off_t page,uchar *anc_buff); -static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key,uchar *anc_buff, - my_off_t leaf_page,uchar *leaf_buff,uchar *keypos, - my_off_t next_block,uchar *ret_key); -static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *anc_buff, - my_off_t leaf_page,uchar *leaf_buff,uchar *keypos); -static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,uchar *keypos, - uchar *lastkey,uchar *page_end, + byte *key,uint key_length,my_off_t page,byte *anc_buff); +static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key,byte *anc_buff, + my_off_t leaf_page,byte *leaf_buff,byte *keypos, + my_off_t next_block,byte *ret_key); +static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *anc_buff, + my_off_t leaf_page,byte *leaf_buff,byte *keypos); +static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,byte *keypos, + byte *lastkey,byte *page_end, my_off_t *next_block); static int _ma_ck_real_delete(register MARIA_HA *info,MARIA_KEYDEF *keyinfo, - uchar *key, uint key_length, my_off_t *root); + byte *key, uint key_length, my_off_t *root); int maria_delete(MARIA_HA *info,const byte *record) { uint i; - uchar *old_key; + byte *old_key; int save_errno; char lastpos[8]; - MARIA_SHARE *share=info->s; DBUG_ENTER("maria_delete"); @@ -61,17 +60,15 @@ int maria_delete(MARIA_HA *info,const byte *record) } if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); - if (info->s->calc_checksum) - info->checksum=(*info->s->calc_checksum)(info,record); if ((*share->compare_record)(info,record)) goto err; /* Error on read-check */ if (_ma_mark_file_changed(info)) goto err; - /* Remove all keys from the .ISAM file */ + /* Remove all keys from the index file */ - old_key=info->lastkey2; + old_key= info->lastkey2; for (i=0 ; i < share->base.keys ; i++ ) { if (maria_is_key_active(info->s->state.key_map, i)) @@ -79,13 +76,13 @@ int maria_delete(MARIA_HA *info,const byte *record) info->s->keyinfo[i].version++; if (info->s->keyinfo[i].flag & HA_FULLTEXT ) { - if (_ma_ft_del(info,i,(char*) old_key,record,info->lastpos)) + if (_ma_ft_del(info,i,(char*) old_key,record,info->cur_row.lastpos)) goto err; } else { if (info->s->keyinfo[i].ck_delete(info,i,old_key, - _ma_make_key(info,i,old_key,record,info->lastpos))) + _ma_make_key(info,i,old_key,record,info->cur_row.lastpos))) goto err; } /* The above changed info->lastkey2. Inform maria_rnext_same(). */ @@ -95,12 +92,21 @@ int maria_delete(MARIA_HA *info,const byte *record) if ((*share->delete_record)(info)) goto err; /* Remove record from database */ - info->state->checksum-=info->checksum; + + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + if (info->s->calc_checksum) + { + info->cur_row.checksum= (*info->s->calc_checksum)(info,record); + info->state->checksum-= info->cur_row.checksum; + } info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED; info->state->records--; - mi_sizestore(lastpos,info->lastpos); + mi_sizestore(lastpos, info->cur_row.lastpos); VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) @@ -113,7 +119,7 @@ int maria_delete(MARIA_HA *info,const byte *record) err: save_errno=my_errno; - mi_sizestore(lastpos,info->lastpos); + mi_sizestore(lastpos, info->cur_row.lastpos); if (save_errno != HA_ERR_RECORD_CHANGED) { maria_print_error(info->s, HA_ERR_CRASHED); @@ -135,7 +141,7 @@ err: /* Remove a key from the btree index */ -int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key, +int _ma_ck_delete(register MARIA_HA *info, uint keynr, byte *key, uint key_length) { return _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length, @@ -144,12 +150,12 @@ int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key, static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, uint key_length, my_off_t *root) + byte *key, uint key_length, my_off_t *root) { int error; uint nod_flag; my_off_t old_root; - uchar *root_buff; + byte *root_buff; DBUG_ENTER("_ma_ck_real_delete"); if ((old_root=*root) == HA_OFFSET_ERROR) @@ -157,7 +163,7 @@ static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, maria_print_error(info->s, HA_ERR_CRASHED); DBUG_RETURN(my_errno=HA_ERR_CRASHED); } - if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + if (!(root_buff= (byte*) my_alloca((uint) keyinfo->block_length+ HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); @@ -212,17 +218,17 @@ err: */ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uint comp_flag, uchar *key, uint key_length, - my_off_t page, uchar *anc_buff) + uint comp_flag, byte *key, uint key_length, + my_off_t page, byte *anc_buff) { int flag,ret_value,save_flag; uint length,nod_flag,search_key_length; my_bool last_key; - uchar *leaf_buff,*keypos; + byte *leaf_buff,*keypos; my_off_t leaf_page,next_block; - uchar lastkey[HA_MAX_KEY_BUFF]; + byte lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("d_search"); - DBUG_DUMP("page",(byte*) anc_buff,maria_getint(anc_buff)); + DBUG_DUMP("page",anc_buff,maria_getint(anc_buff)); search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length, @@ -264,7 +270,7 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, /* popular word. two-level tree. going down */ uint tmp_key_length; my_off_t root; - uchar *kpos=keypos; + byte *kpos=keypos; if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey))) { @@ -304,8 +310,8 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (nod_flag) { leaf_page= _ma_kpos(nod_flag,keypos); - if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - HA_MAX_KEY_BUFF*2))) + if (!(leaf_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); my_errno=ENOMEM; @@ -366,7 +372,7 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) goto err; ret_value= _ma_insert(info,keyinfo,key,anc_buff,keypos,lastkey, - (uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0); + (byte*) 0,(byte*) 0,(my_off_t) 0,(my_bool) 0); } } if (ret_value == 0 && maria_getint(anc_buff) > keyinfo->block_length) @@ -378,14 +384,14 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, ret_value|= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff); else { - DBUG_DUMP("page",(byte*) anc_buff,maria_getint(anc_buff)); + DBUG_DUMP("page",anc_buff,maria_getint(anc_buff)); } - my_afree((byte*) leaf_buff); + my_afree(leaf_buff); DBUG_PRINT("exit",("Return: %d",ret_value)); DBUG_RETURN(ret_value); err: - my_afree((byte*) leaf_buff); + my_afree(leaf_buff); DBUG_PRINT("exit",("Error: %d",my_errno)); DBUG_RETURN (-1); } /* d_search */ @@ -393,24 +399,25 @@ err: /* Remove a key that has a page-reference */ -static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *key, - uchar *anc_buff, my_off_t leaf_page, uchar *leaf_buff, - uchar *keypos, /* Pos to where deleted key was */ +static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *anc_buff, my_off_t leaf_page, + byte *leaf_buff, + byte *keypos, /* Pos to where deleted key was */ my_off_t next_block, - uchar *ret_key) /* key before keypos in anc_buff */ + byte *ret_key) /* key before keypos in anc_buff */ { int ret_value,length; uint a_length,nod_flag,tmp; my_off_t next_page; - uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + byte keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; MARIA_SHARE *share=info->s; MARIA_KEY_PARAM s_temp; DBUG_ENTER("del"); DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", leaf_page, (ulong) keypos)); - DBUG_DUMP("leaf_buff",(byte*) leaf_buff,maria_getint(leaf_buff)); + DBUG_DUMP("leaf_buff",leaf_buff,maria_getint(leaf_buff)); - endpos=leaf_buff+maria_getint(leaf_buff); + endpos= leaf_buff+ maria_getint(leaf_buff); if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, &tmp))) DBUG_RETURN(-1); @@ -418,14 +425,14 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *k if ((nod_flag=_ma_test_if_nod(leaf_buff))) { next_page= _ma_kpos(nod_flag,endpos); - if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + if (!(next_buff= (byte*) my_alloca((uint) keyinfo->block_length+ HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) ret_value= -1; else { - DBUG_DUMP("next_page",(byte*) next_buff,maria_getint(next_buff)); + DBUG_DUMP("next_page",next_buff,maria_getint(next_buff)); if ((ret_value=del(info,keyinfo,key,anc_buff,next_page,next_buff, keypos,next_block,ret_key)) >0) { @@ -446,13 +453,13 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *k &tmp)) goto err; ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff, - (uchar*) 0,(uchar*) 0,(my_off_t) 0,0); + (byte*) 0,(byte*) 0,(my_off_t) 0,0); } } if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) goto err; } - my_afree((byte*) next_buff); + my_afree(next_buff); DBUG_RETURN(ret_value); } @@ -472,11 +479,11 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *k prev_key=(keypos == anc_buff+2+share->base.key_reflength ? 0 : ret_key); length=(*keyinfo->pack_key)(keyinfo,share->base.key_reflength, - keypos == endpos ? (uchar*) 0 : keypos, + keypos == endpos ? (byte*) 0 : keypos, prev_key, prev_key, keybuff,&s_temp); if (length > 0) - bmove_upp((byte*) endpos+length,(byte*) endpos,(uint) (endpos-keypos)); + bmove_upp(endpos+length,endpos,(uint) (endpos-keypos)); else bmove(keypos,keypos-length, (int) (endpos-keypos)+length); (*keyinfo->store_key)(keyinfo,keypos,&s_temp); @@ -497,28 +504,28 @@ err: /* Balances adjacent pages if underflow occours */ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uchar *anc_buff, + byte *anc_buff, my_off_t leaf_page,/* Ancestor page and underflow page */ - uchar *leaf_buff, - uchar *keypos) /* Position to pos after key */ + byte *leaf_buff, + byte *keypos) /* Position to pos after key */ { int t_length; uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, key_reflength,key_length; my_off_t next_page; - uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF], - *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key, - *after_key; + byte anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF]; + byte *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key; + byte *after_key; MARIA_KEY_PARAM s_temp; MARIA_SHARE *share=info->s; DBUG_ENTER("underflow"); DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page, (ulong) keypos)); - DBUG_DUMP("anc_buff",(byte*) anc_buff,maria_getint(anc_buff)); - DBUG_DUMP("leaf_buff",(byte*) leaf_buff,maria_getint(leaf_buff)); + DBUG_DUMP("anc_buff",anc_buff,maria_getint(anc_buff)); + DBUG_DUMP("leaf_buff",leaf_buff,maria_getint(leaf_buff)); buff=info->buff; - info->buff_used=1; + info->keybuff_used=1; next_keypos=keypos; nod_flag=_ma_test_if_nod(leaf_buff); p_length=nod_flag+2; @@ -551,26 +558,25 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) goto err; buff_length=maria_getint(buff); - DBUG_DUMP("next",(byte*) buff,buff_length); + DBUG_DUMP("next",buff,buff_length); /* find keys to make a big key-page */ - bmove((byte*) next_keypos-key_reflength,(byte*) buff+2, - key_reflength); + bmove(next_keypos-key_reflength, buff+2, key_reflength); if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,next_keypos,&length) || !_ma_get_last_key(info,keyinfo,leaf_buff,leaf_key, leaf_buff+leaf_length,&length)) goto err; /* merge pages and put parting key from anc_buff between */ - prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key); + prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length, prev_key, prev_key, anc_key, &s_temp); length=buff_length-p_length; endpos=buff+length+leaf_length+t_length; /* buff will always be larger than before !*/ - bmove_upp((byte*) endpos, (byte*) buff+buff_length,length); - memcpy((byte*) buff, (byte*) leaf_buff,(size_t) leaf_length); + bmove_upp(endpos, buff+buff_length,length); + memcpy(buff, leaf_buff,(size_t) leaf_length); (*keyinfo->store_key)(keyinfo,buff+leaf_length,&s_temp); buff_length=(uint) (endpos-buff); maria_putint(buff,buff_length,nod_flag); @@ -586,14 +592,14 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (buff_length <= keyinfo->block_length) { /* Keys in one page */ - memcpy((byte*) leaf_buff,(byte*) buff,(size_t) buff_length); + memcpy(leaf_buff,buff,(size_t) buff_length); if (_ma_dispose(info,keyinfo,next_page,DFLT_INIT_HITS)) goto err; } else { /* Page is full */ endpos=anc_buff+anc_length; - DBUG_PRINT("test",("anc_buff: %lx endpos: %lx",anc_buff,endpos)); + DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx",anc_buff,endpos)); if (keypos != anc_buff+2+key_reflength && !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length)) goto err; @@ -601,22 +607,21 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, &key_length, &after_key))) goto err; length=(uint) (half_pos-buff); - memcpy((byte*) leaf_buff,(byte*) buff,(size_t) length); + memcpy(leaf_buff,buff,(size_t) length); maria_putint(leaf_buff,length,nod_flag); /* Correct new keypointer to leaf_page */ half_pos=after_key; _ma_kpointer(info,leaf_key+key_length,next_page); /* Save key in anc_buff */ - prev_key=(keypos == anc_buff+2+key_reflength ? (uchar*) 0 : anc_key), + prev_key=(keypos == anc_buff+2+key_reflength ? (byte*) 0 : anc_key), t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, - (keypos == endpos ? (uchar*) 0 : + (keypos == endpos ? (byte*) 0 : keypos), prev_key, prev_key, leaf_key, &s_temp); if (t_length >= 0) - bmove_upp((byte*) endpos+t_length,(byte*) endpos, - (uint) (endpos-keypos)); + bmove_upp(endpos+t_length, endpos, (uint) (endpos-keypos)); else bmove(keypos,keypos-t_length,(uint) (endpos-keypos)+t_length); (*keyinfo->store_key)(keyinfo,keypos,&s_temp); @@ -624,15 +629,15 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, /* Store key first in new page */ if (nod_flag) - bmove((byte*) buff+2,(byte*) half_pos-nod_flag,(size_t) nod_flag); + bmove(buff+2,half_pos-nod_flag,(size_t) nod_flag); if (!(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key)) goto err; - t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (uchar*) 0, - (uchar*) 0, (uchar *) 0, + t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (byte*) 0, + (byte*) 0, (byte*) 0, leaf_key, &s_temp); /* t_length will always be > 0 for a new page !*/ length=(uint) ((buff+maria_getint(buff))-half_pos); - bmove((byte*) buff+p_length+t_length,(byte*) half_pos,(size_t) length); + bmove(buff+p_length+t_length, half_pos, (size_t) length); (*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp); maria_putint(buff,length+t_length+p_length,nod_flag); @@ -655,11 +660,10 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, goto err; buff_length=maria_getint(buff); endpos=buff+buff_length; - DBUG_DUMP("prev",(byte*) buff,buff_length); + DBUG_DUMP("prev",buff,buff_length); /* find keys to make a big key-page */ - bmove((byte*) next_keypos - key_reflength,(byte*) leaf_buff+2, - key_reflength); + bmove(next_keypos - key_reflength, leaf_buff+2, key_reflength); next_keypos=keypos; if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, anc_key)) @@ -668,17 +672,17 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, goto err; /* merge pages and put parting key from anc_buff between */ - prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key); + prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (leaf_length == p_length ? - (uchar*) 0 : leaf_buff+p_length), + (byte*) 0 : leaf_buff+p_length), prev_key, prev_key, anc_key, &s_temp); if (t_length >= 0) - bmove((byte*) endpos+t_length,(byte*) leaf_buff+p_length, - (size_t) (leaf_length-p_length)); + bmove(endpos+t_length, leaf_buff+p_length, + (size_t) (leaf_length-p_length)); else /* We gained space */ - bmove((byte*) endpos,(byte*) leaf_buff+((int) p_length-t_length), + bmove(endpos,leaf_buff+((int) p_length-t_length), (size_t) (leaf_length-p_length+t_length)); (*keyinfo->store_key)(keyinfo,endpos,&s_temp); @@ -711,18 +715,17 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, goto err; _ma_kpointer(info,leaf_key+key_length,leaf_page); /* Save key in anc_buff */ - DBUG_DUMP("anc_buff",(byte*) anc_buff,anc_length); - DBUG_DUMP("key_to_anc",(byte*) leaf_key,key_length); + DBUG_DUMP("anc_buff",anc_buff,anc_length); + DBUG_DUMP("key_to_anc",leaf_key,key_length); temp_pos=anc_buff+anc_length; t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, - keypos == temp_pos ? (uchar*) 0 + keypos == temp_pos ? (byte*) 0 : keypos, anc_pos, anc_pos, leaf_key,&s_temp); if (t_length > 0) - bmove_upp((byte*) temp_pos+t_length,(byte*) temp_pos, - (uint) (temp_pos-keypos)); + bmove_upp(temp_pos+t_length, temp_pos, (uint) (temp_pos-keypos)); else bmove(keypos,keypos-t_length,(uint) (temp_pos-keypos)+t_length); (*keyinfo->store_key)(keyinfo,keypos,&s_temp); @@ -730,15 +733,15 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, /* Store first key on new page */ if (nod_flag) - bmove((byte*) leaf_buff+2,(byte*) half_pos-nod_flag,(size_t) nod_flag); + bmove(leaf_buff+2,half_pos-nod_flag,(size_t) nod_flag); if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key))) goto err; - DBUG_DUMP("key_to_leaf",(byte*) leaf_key,length); - t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0, - (uchar*) 0, (uchar*) 0, leaf_key, &s_temp); + DBUG_DUMP("key_to_leaf",leaf_key,length); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (byte*) 0, + (byte*) 0, (byte*) 0, leaf_key, &s_temp); length=(uint) ((buff+buff_length)-half_pos); DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length)); - bmove((byte*) leaf_buff+p_length+t_length,(byte*) half_pos, + bmove(leaf_buff+p_length+t_length,half_pos, (size_t) length); (*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp); maria_putint(leaf_buff,length+t_length+p_length,nod_flag); @@ -763,15 +766,15 @@ err: */ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar *keypos, /* Where key starts */ - uchar *lastkey, /* key to be removed */ - uchar *page_end, /* End of page */ + byte *keypos, /* Where key starts */ + byte *lastkey, /* key to be removed */ + byte *page_end, /* End of page */ my_off_t *next_block) /* ptr to next block */ { int s_length; - uchar *start; + byte *start; DBUG_ENTER("remove_key"); - DBUG_PRINT("enter",("keypos: %lx page_end: %lx",keypos,page_end)); + DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx", keypos, page_end)); start=keypos; if (!(keyinfo->flag & @@ -795,7 +798,7 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, { if (keyinfo->flag & HA_BINARY_PACK_KEY) { - uchar *old_key=start; + byte *old_key=start; uint next_length,prev_length,prev_pack_length; get_key_length(next_length,keypos); get_key_pack_length(prev_length,prev_pack_length,old_key); @@ -882,7 +885,6 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, } } end: - bmove((byte*) start,(byte*) start+s_length, - (uint) (page_end-start-s_length)); + bmove(start, start+s_length, (uint) (page_end-start-s_length)); DBUG_RETURN((uint) s_length); } /* remove_key */ diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index b16d82ed9f7..3e1448e858a 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -43,8 +43,7 @@ int maria_delete_all_rows(MARIA_HA *info) info->state->empty=info->state->key_empty=0; info->state->checksum=0; - for (i=share->base.max_key_block_length/MARIA_MIN_KEY_BLOCK_LENGTH ; i-- ; ) - state->key_del[i]= HA_OFFSET_ERROR; + state->key_del= HA_OFFSET_ERROR; for (i=0 ; i < share->base.keys ; i++) state->key_root[i]= HA_OFFSET_ERROR; diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 047826408c3..a1d4ee58bf1 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -26,19 +26,16 @@ #include "maria_def.h" -/* Enough for comparing if number is zero */ -static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - -static int write_dynamic_record(MARIA_HA *info,const byte *record, - ulong reclength); +static my_bool write_dynamic_record(MARIA_HA *info,const byte *record, + ulong reclength); static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos, ulong *length); -static int update_dynamic_record(MARIA_HA *info,my_off_t filepos,byte *record, - ulong reclength); -static int delete_dynamic_record(MARIA_HA *info,my_off_t filepos, - uint second_read); -static int _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, - uint length); +static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + byte *record, ulong reclength); +static my_bool delete_dynamic_record(MARIA_HA *info,MARIA_RECORD_POS filepos, + uint second_read); +static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length); #ifdef THREAD /* Play it safe; We have a small stack when using threads */ @@ -224,19 +221,26 @@ uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, } -int _ma_write_dynamic_record(MARIA_HA *info, const byte *record) +my_bool _ma_write_dynamic_record(MARIA_HA *info, const byte *record) { - ulong reclength= _ma_rec_pack(info,info->rec_buff,record); - return (write_dynamic_record(info,info->rec_buff,reclength)); + ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, + record); + return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, + reclength)); } -int _ma_update_dynamic_record(MARIA_HA *info, my_off_t pos, const byte *record) +my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record) { - uint length= _ma_rec_pack(info,info->rec_buff,record); - return (update_dynamic_record(info,pos,info->rec_buff,length)); + uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET, + record); + return (update_dynamic_record(info, pos, + info->rec_buff + MARIA_REC_BUFF_OFFSET, + length)); } -int _ma_write_blob_record(MARIA_HA *info, const byte *record) + +my_bool _ma_write_blob_record(MARIA_HA *info, const byte *record) { byte *rec_buff; int error; @@ -246,31 +250,27 @@ int _ma_write_blob_record(MARIA_HA *info, const byte *record) MARIA_DYN_DELETE_BLOCK_HEADER+1); reclength= (info->s->base.pack_reclength + _ma_calc_total_blob_length(info,record)+ extra); -#ifdef NOT_USED /* We now support big rows */ - if (reclength > MARIA_DYN_MAX_ROW_LENGTH) - { - my_errno=HA_ERR_TO_BIG_ROW; - return -1; - } -#endif if (!(rec_buff=(byte*) my_alloca(reclength))) { my_errno=ENOMEM; - return(-1); + return(1); } - reclength2= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength2= _ma_rec_pack(info, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), record); DBUG_PRINT("info",("reclength: %lu reclength2: %lu", reclength, reclength2)); DBUG_ASSERT(reclength2 <= reclength); - error=write_dynamic_record(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), - reclength2); + error= write_dynamic_record(info, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength2); my_afree(rec_buff); - return(error); + return(error != 0); } -int _ma_update_blob_record(MARIA_HA *info, my_off_t pos, const byte *record) +my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record) { byte *rec_buff; int error; @@ -284,13 +284,13 @@ int _ma_update_blob_record(MARIA_HA *info, my_off_t pos, const byte *record) if (reclength > MARIA_DYN_MAX_ROW_LENGTH) { my_errno=HA_ERR_TO_BIG_ROW; - return -1; + return 1; } #endif if (!(rec_buff=(byte*) my_alloca(reclength))) { my_errno=ENOMEM; - return(-1); + return(1); } reclength= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), record); @@ -298,20 +298,20 @@ int _ma_update_blob_record(MARIA_HA *info, my_off_t pos, const byte *record) rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), reclength); my_afree(rec_buff); - return(error); + return(error != 0); } -int _ma_delete_dynamic_record(MARIA_HA *info) +my_bool _ma_delete_dynamic_record(MARIA_HA *info) { - return delete_dynamic_record(info,info->lastpos,0); + return delete_dynamic_record(info, info->cur_row.lastpos, 0); } /* Write record to data-file */ -static int write_dynamic_record(MARIA_HA *info, const byte *record, - ulong reclength) +static my_bool write_dynamic_record(MARIA_HA *info, const byte *record, + ulong reclength) { int flag; ulong length; @@ -443,8 +443,8 @@ static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info) (maria_rrnd() or maria_scan(), then ensure that we skip over this block when doing next maria_rrnd() or maria_scan(). */ - if (info->nextpos == block_info->filepos) - info->nextpos+=block_info->block_len; + if (info->cur_row.nextpos == block_info->filepos) + info->cur_row.nextpos+= block_info->block_len; DBUG_RETURN(0); } @@ -464,8 +464,9 @@ static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info) 1 error. In this case my_error is set. */ -static int update_backward_delete_link(MARIA_HA *info, my_off_t delete_block, - my_off_t filepos) +static my_bool update_backward_delete_link(MARIA_HA *info, + my_off_t delete_block, + MARIA_RECORD_POS filepos) { MARIA_BLOCK_INFO block_info; DBUG_ENTER("update_backward_delete_link"); @@ -490,11 +491,11 @@ static int update_backward_delete_link(MARIA_HA *info, my_off_t delete_block, DBUG_RETURN(0); } - /* Delete datarecord from database */ - /* info->rec_cache.seek_not_done is updated in cmp_record */ +/* Delete datarecord from database */ +/* info->rec_cache.seek_not_done is updated in cmp_record */ -static int delete_dynamic_record(MARIA_HA *info, my_off_t filepos, - uint second_read) +static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + uint second_read) { uint length,b_type; MARIA_BLOCK_INFO block_info,del_block; @@ -522,7 +523,8 @@ static int delete_dynamic_record(MARIA_HA *info, my_off_t filepos, del_block.second_read=0; remove_next_block=0; if (_ma_get_block_info(&del_block,info->dfile,filepos+length) & - BLOCK_DELETED && del_block.block_len+length < MARIA_DYN_MAX_BLOCK_LENGTH) + BLOCK_DELETED && del_block.block_len+length < + MARIA_DYN_MAX_BLOCK_LENGTH) { /* We can't remove this yet as this block may be the head block */ remove_next_block=1; @@ -719,8 +721,9 @@ int _ma_write_part_record(MARIA_HA *info, else { info->rec_cache.seek_not_done=1; - if (info->s->file_write(info,(byte*) *record-head_length,length+extra_length+ - del_length,filepos,info->s->write_flag)) + if (info->s->file_write(info,(byte*) *record-head_length, + length+extra_length+ + del_length,filepos,info->s->write_flag)) goto err; } memcpy(record_end,temp,(size_t) (extra_length+del_length)); @@ -745,8 +748,8 @@ err: /* update record from datafile */ -static int update_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *record, - ulong reclength) +static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + byte *record, ulong reclength) { int flag; uint error; @@ -784,8 +787,8 @@ static int update_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *record, { /* extend file */ DBUG_PRINT("info",("Extending file with %d bytes",tmp)); - if (info->nextpos == info->state->data_file_length) - info->nextpos+= tmp; + if (info->cur_row.nextpos == info->state->data_file_length) + info->cur_row.nextpos+= tmp; info->state->data_file_length+= tmp; info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK; length+=tmp; @@ -829,8 +832,8 @@ static int update_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *record, mi_int3store(del_block.header+1, rest_length); mi_sizestore(del_block.header+4,info->s->state.dellink); bfill(del_block.header+12,8,255); - if (info->s->file_write(info,(byte*) del_block.header,20, next_pos, - MYF(MY_NABP))) + if (info->s->file_write(info,(byte*) del_block.header, 20, + next_pos, MYF(MY_NABP))) DBUG_RETURN(1); info->s->state.dellink= next_pos; info->s->state.split++; @@ -877,9 +880,18 @@ uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) MARIA_BLOB *blob; DBUG_ENTER("_ma_rec_pack"); - flag=0 ; bit=1; - startpos=packpos=to; to+= info->s->base.pack_bits; blob=info->blobs; - rec=info->s->rec; + flag= 0; + bit= 1; + startpos= packpos=to; + to+= info->s->base.pack_bytes; + blob= info->blobs; + rec= info->s->rec; + if (info->s->base.null_bytes) + { + memcpy(to, from, info->s->base.null_bytes); + from+= info->s->base.null_bytes; + to+= info->s->base.null_bytes; + } for (i=info->s->base.fields ; i-- > 0; from+= length,rec++) { @@ -903,7 +915,7 @@ uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) } else if (type == FIELD_SKIP_ZERO) { - if (memcmp((byte*) from,zero_string,length) == 0) + if (memcmp((byte*) from, maria_zero_string, length) == 0) flag|=bit; else { @@ -981,7 +993,7 @@ uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) if (bit != 1) *packpos= (char) (uchar) flag; if (info->s->calc_checksum) - *to++=(char) info->checksum; + *to++= (byte) info->cur_row.checksum; DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos))); DBUG_RETURN((uint) (to-startpos)); } /* _ma_rec_pack */ @@ -989,7 +1001,7 @@ uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) /* - Check if a record was correctly packed. Used only by mariachk + Check if a record was correctly packed. Used only by maria_chk Returns 0 if record is ok. */ @@ -1002,9 +1014,11 @@ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, reg3 MARIA_COLUMNDEF *rec; DBUG_ENTER("_ma_rec_check"); - packpos=rec_buff; to= rec_buff+info->s->base.pack_bits; + packpos=rec_buff; to= rec_buff+info->s->base.pack_bytes; rec=info->s->rec; flag= *packpos; bit=1; + record+= info->s->base.null_bytes; + to+= info->s->base.null_bytes; for (i=info->s->base.fields ; i-- > 0; record+= length, rec++) { @@ -1022,7 +1036,7 @@ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, } else if (type == FIELD_SKIP_ZERO) { - if (memcmp((byte*) record,zero_string,length) == 0) + if (memcmp((byte*) record, maria_zero_string, length) == 0) { if (!(flag & bit)) goto err; @@ -1098,7 +1112,7 @@ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1)))) goto err; - if (with_checksum && ((uchar) info->checksum != (uchar) *to)) + if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to)) { DBUG_PRINT("error",("wrong checksum for row")); goto err; @@ -1129,8 +1143,16 @@ ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, flag= (uchar) *from; bit=1; packpos=from; if (found_length < info->s->base.min_pack_length) goto err; - from+= info->s->base.pack_bits; - min_pack_length=info->s->base.min_pack_length - info->s->base.pack_bits; + from+= info->s->base.pack_bytes; + min_pack_length= info->s->base.min_pack_length - info->s->base.pack_bytes; + + if ((length= info->s->base.null_bytes)) + { + memcpy(to, from, length); + from+= length; + to+= length; + min_pack_length-= length; + } for (rec=info->s->rec , end_field=rec+info->s->base.fields ; rec < end_field ; to+= rec_length, rec++) @@ -1234,13 +1256,13 @@ ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, } } if (info->s->calc_checksum) - from++; + info->cur_row.checksum= (uint) (uchar) *from++; if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1)))) DBUG_RETURN(found_length); err: my_errno= HA_ERR_WRONG_IN_RECORD; - DBUG_PRINT("error",("to_end: %lx -> %lx from_end: %lx -> %lx", + DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx", to,to_end,from,from_end)); DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length); DBUG_RETURN(MY_FILE_ERROR); @@ -1305,16 +1327,17 @@ void _ma_store_blob_length(byte *pos,uint pack_length,uint length) /* Read record from datafile */ - /* Returns 0 if ok, -1 if error */ + /* Returns 0 if ok, 1 if error */ -int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) +int _ma_read_dynamic_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos) { int flag; uint b_type,left_length; byte *to; MARIA_BLOCK_INFO block_info; File file; - DBUG_ENTER("maria_read_dynamic_record"); + DBUG_ENTER("_ma_read_dynamic_record"); if (filepos != HA_OFFSET_ERROR) { @@ -1349,17 +1372,18 @@ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) goto panic; if (info->s->base.blobs) { - if (!(to=_ma_alloc_rec_buff(info, block_info.rec_len, - &info->rec_buff))) + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) goto err; } - else - to= info->rec_buff; + to= info->rec_buff; left_length=block_info.rec_len; } if (left_length < block_info.data_len || ! block_info.data_len) goto panic; /* Wrong linked record */ - if (info->s->file_read(info,(byte*) to,block_info.data_len,block_info.filepos, + if (info->s->file_read(info,(byte*) to,block_info.data_len, + block_info.filepos, MYF(MY_NABP))) goto panic; left_length-=block_info.data_len; @@ -1369,55 +1393,61 @@ int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, byte *buf) info->update|= HA_STATE_AKTIV; /* We have a aktive record */ fast_ma_writeinfo(info); DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != - MY_FILE_ERROR ? 0 : -1); + MY_FILE_ERROR ? 0 : 1); } fast_ma_writeinfo(info); - DBUG_RETURN(-1); /* Wrong data to read */ + DBUG_RETURN(1); /* Wrong data to read */ panic: my_errno=HA_ERR_WRONG_IN_RECORD; err: VOID(_ma_writeinfo(info,0)); - DBUG_RETURN(-1); + DBUG_RETURN(1); } /* compare unique constraint between stored rows */ -int _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, - const byte *record, my_off_t pos) +my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) { - byte *rec_buff,*old_record; - int error; + byte *old_rec_buff,*old_record; + my_off_t old_rec_buff_size; + my_bool error; DBUG_ENTER("_ma_cmp_dynamic_unique"); if (!(old_record=my_alloca(info->s->base.reclength))) DBUG_RETURN(1); /* Don't let the compare destroy blobs that may be in use */ - rec_buff=info->rec_buff; + old_rec_buff= info->rec_buff; + old_rec_buff_size= info->rec_buff_size; + if (info->s->base.blobs) - info->rec_buff=0; - error= _ma_read_dynamic_record(info,pos,old_record); + info->rec_buff= 0; + error= _ma_read_dynamic_record(info, old_record, pos) != 0; if (!error) - error=_ma_unique_comp(def, record, old_record, def->null_are_equal); + error=_ma_unique_comp(def, record, old_record, def->null_are_equal) != 0; if (info->s->base.blobs) { - my_free(_ma_get_rec_buff_ptr(info, info->rec_buff), MYF(MY_ALLOW_ZERO_PTR)); - info->rec_buff=rec_buff; + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff= old_rec_buff; + info->rec_buff_size= old_rec_buff_size; } my_afree(old_record); DBUG_RETURN(error); } - /* Compare of record one disk with packed record in memory */ + /* Compare of record on disk with packed record in memory */ -int _ma_cmp_dynamic_record(register MARIA_HA *info, register const byte *record) +my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, + register const byte *record) { - uint flag,reclength,b_type; + uint flag, reclength, b_type,cmp_length; my_off_t filepos; byte *buffer; MARIA_BLOCK_INFO block_info; + my_bool error= 1; DBUG_ENTER("_ma_cmp_dynamic_record"); /* We are going to do changes; dont let anybody disturb */ @@ -1427,7 +1457,7 @@ int _ma_cmp_dynamic_record(register MARIA_HA *info, register const byte *record) { info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK); if (flush_io_cache(&info->rec_cache)) - DBUG_RETURN(-1); + DBUG_RETURN(1); } info->rec_cache.seek_not_done=1; @@ -1440,12 +1470,12 @@ int _ma_cmp_dynamic_record(register MARIA_HA *info, register const byte *record) { if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+ _ma_calc_total_blob_length(info,record)))) - DBUG_RETURN(-1); + DBUG_RETURN(1); } reclength= _ma_rec_pack(info,buffer,record); record= buffer; - filepos=info->lastpos; + filepos= info->cur_row.lastpos; flag=block_info.second_read=0; block_info.next_filepos=filepos; while (reclength > 0) @@ -1472,9 +1502,13 @@ int _ma_cmp_dynamic_record(register MARIA_HA *info, register const byte *record) my_errno=HA_ERR_WRONG_IN_RECORD; goto err; } - reclength-=block_info.data_len; + reclength-= block_info.data_len; + cmp_length= block_info.data_len; + if (!reclength && info->s->calc_checksum) + cmp_length--; /* 'record' may not contain checksum */ + if (_ma_cmp_buffer(info->dfile,record,block_info.filepos, - block_info.data_len)) + cmp_length)) { my_errno=HA_ERR_RECORD_CHANGED; goto err; @@ -1484,17 +1518,19 @@ int _ma_cmp_dynamic_record(register MARIA_HA *info, register const byte *record) } } my_errno=0; + error= 0; err: if (buffer != info->rec_buff) my_afree((gptr) buffer); - DBUG_RETURN(my_errno); + DBUG_PRINT("exit", ("result: %d", error)); + DBUG_RETURN(error); } /* Compare file to buffert */ -static int _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, - uint length) +static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length) { uint next_length; char temp_buff[IO_SIZE*2]; @@ -1514,14 +1550,15 @@ static int _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, } if (my_pread(file,temp_buff,length,filepos,MYF(MY_NABP))) goto err; - DBUG_RETURN(memcmp((byte*) buff,temp_buff,length)); + DBUG_RETURN(memcmp((byte*) buff,temp_buff,length) != 0); err: DBUG_RETURN(1); } -int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, - register my_off_t filepos, +int _ma_read_rnd_dynamic_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, my_bool skip_deleted_blocks) { int flag,info_read,save_errno; @@ -1594,9 +1631,9 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, } if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) { - my_errno=HA_ERR_RECORD_DELETED; - info->lastpos=block_info.filepos; - info->nextpos=block_info.filepos+block_info.block_len; + my_errno= HA_ERR_RECORD_DELETED; + info->cur_row.lastpos= block_info.filepos; + info->cur_row.nextpos= block_info.filepos+block_info.block_len; } goto err; } @@ -1604,15 +1641,15 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, { if (block_info.rec_len > (uint) share->base.max_pack_length) goto panic; - info->lastpos=filepos; + info->cur_row.lastpos= filepos; if (share->base.blobs) { - if (!(to= _ma_alloc_rec_buff(info, block_info.rec_len, - &info->rec_buff))) + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) goto err; } - else - to= info->rec_buff; + to= info->rec_buff; left_len=block_info.rec_len; } if (left_len < block_info.data_len) @@ -1658,7 +1695,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, byte *buf, } if (flag++ == 0) { - info->nextpos=block_info.filepos+block_info.block_len; + info->cur_row.nextpos= block_info.filepos+block_info.block_len; skip_deleted_blocks=0; } left_len-=block_info.data_len; diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index d600fedb99b..a75b8084214 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -19,7 +19,8 @@ #include #endif -static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function); +static void maria_extra_keyflag(MARIA_HA *info, + enum ha_extra_function function); /* @@ -38,7 +39,8 @@ static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function) # error */ -int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg) +int maria_extra(MARIA_HA *info, enum ha_extra_function function, + void *extra_arg) { int error=0; ulong cache_size; @@ -49,7 +51,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg switch (function) { case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ info->lastinx= 0; /* Use first index as def */ - info->last_search_keypage=info->lastpos= HA_OFFSET_ERROR; + info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; info->page_changed=1; /* Next/prev gives first/last */ if (info->opt_flag & READ_CACHE_USED) @@ -115,7 +117,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg case HA_EXTRA_REINIT_CACHE: if (info->opt_flag & READ_CACHE_USED) { - reinit_io_cache(&info->rec_cache,READ_CACHE,info->nextpos, + reinit_io_cache(&info->rec_cache, READ_CACHE, info->cur_row.nextpos, (pbool) (info->lock_type != F_UNLCK), (pbool) test(info->update & HA_STATE_ROW_CHANGED)); info->update&= ~HA_STATE_ROW_CHANGED; @@ -185,7 +187,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg (byte*) info->lastkey,info->lastkey_length); info->save_update= info->update; info->save_lastinx= info->lastinx; - info->save_lastpos= info->lastpos; + info->save_lastpos= info->cur_row.lastpos; info->save_lastkey_length=info->lastkey_length; if (function == HA_EXTRA_REMEMBER_POS) break; @@ -203,7 +205,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg info->save_lastkey_length); info->update= info->save_update | HA_STATE_WRITTEN; info->lastinx= info->save_lastinx; - info->lastpos= info->save_lastpos; + info->cur_row.lastpos= info->save_lastpos; info->lastkey_length=info->save_lastkey_length; } info->read_record= share->read_record; @@ -327,8 +329,13 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, void *extra_arg maria_mark_crashed(info); /* Fatal error found */ } } - if (share->base.blobs) - _ma_alloc_rec_buff(info, -1, &info->rec_buff); + if (share->base.blobs && info->rec_buff_size > + share->base.default_rec_buff_size) + { + info->rec_buff_size= 1; /* Force realloc */ + _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + share->base.default_rec_buff_size); + } break; case HA_EXTRA_NORMAL: /* Theese isn't in use */ info->quick_mode=0; @@ -419,8 +426,13 @@ int maria_reset(MARIA_HA *info) info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); error= end_io_cache(&info->rec_cache); } - if (share->base.blobs) - _ma_alloc_rec_buff(info, -1, &info->rec_buff); + if (share->base.blobs && info->rec_buff_size > + share->base.default_rec_buff_size) + { + info->rec_buff_size= 1; /* Force realloc */ + _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + share->base.default_rec_buff_size); + } #if defined(HAVE_MMAP) && defined(HAVE_MADVISE) if (info->opt_flag & MEMMAP_USED) madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); @@ -428,7 +440,7 @@ int maria_reset(MARIA_HA *info) info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); info->quick_mode=0; info->lastinx= 0; /* Use first index as def */ - info->last_search_keypage= info->lastpos= HA_OFFSET_ERROR; + info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; info->page_changed= 1; info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c index 83901cb5e47..40ac88bfcbf 100644 --- a/storage/maria/ma_ft_boolean_search.c +++ b/storage/maria/ma_ft_boolean_search.c @@ -332,7 +332,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) my_bool can_go_down; MARIA_HA *info=ftb->info; uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength; - byte *lastkey_buf=ftbw->word+ftbw->off; + byte *lastkey_buf= ftbw->word+ftbw->off; LINT_INIT(off); if (ftbw->flags & FTB_FLAG_TRUNC) @@ -343,7 +343,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) ftbw->key_root=info->s->state.key_root[ftb->keynr]; ftbw->keyinfo=info->s->keyinfo+ftb->keynr; - r= _ma_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len, + r= _ma_search(info, ftbw->keyinfo, ftbw->word, ftbw->len, SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root); } else @@ -352,10 +352,10 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) if (ftbw->docid[0] < *ftbw->max_docid) { sflag|= SEARCH_SAME; - _ma_dpointer(info, (uchar *)(ftbw->word + ftbw->len + HA_FT_WLEN), + _ma_dpointer(info, (ftbw->word + ftbw->len + HA_FT_WLEN), *ftbw->max_docid); } - r= _ma_search(info, ftbw->keyinfo, (uchar*) lastkey_buf, + r= _ma_search(info, ftbw->keyinfo, lastkey_buf, USE_WHOLE_KEY, sflag, ftbw->key_root); } @@ -369,7 +369,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) off=info->lastkey_length-extra; subkeys=ft_sintXkorr(info->lastkey+off); } - if (subkeys<0 || info->lastpos < info->state->data_file_length) + if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length) break; r= _ma_search_next(info, ftbw->keyinfo, info->lastkey, info->lastkey_length, @@ -379,11 +379,11 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) if (!r && !ftbw->off) { r= ha_compare_text(ftb->charset, - info->lastkey+1, + (uchar*) info->lastkey+1, info->lastkey_length-extra-1, - (uchar*) ftbw->word+1, + (uchar*) ftbw->word+1, ftbw->len-1, - (my_bool) (ftbw->flags & FTB_FLAG_TRUNC),0); + (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0); } if (r) /* not found */ @@ -405,7 +405,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) } /* going up to the first-level tree to continue search there */ - _ma_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root); + _ma_dpointer(info, (lastkey_buf+HA_FT_WLEN), ftbw->key_root); ftbw->key_root=info->s->state.key_root[ftb->keynr]; ftbw->keyinfo=info->s->keyinfo+ftb->keynr; ftbw->off=0; @@ -425,15 +425,15 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) TODO here: subkey-based optimization */ ftbw->off=off; - ftbw->key_root=info->lastpos; + ftbw->key_root= info->cur_row.lastpos; ftbw->keyinfo=& info->s->ft2_keyinfo; r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root); DBUG_ASSERT(r==0); /* found something */ memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length); } - ftbw->docid[0]=info->lastpos; + ftbw->docid[0]= info->cur_row.lastpos; if (ftbw->flags & FTB_FLAG_YES) - *ftbw->max_docid= info->lastpos; + *ftbw->max_docid= info->cur_row.lastpos; return 0; } @@ -796,11 +796,11 @@ int maria_ft_boolean_read_next(FT_INFO *ftb, char *record) /* but it managed already to get past this line once */ continue; - info->lastpos=curdoc; + info->cur_row.lastpos= curdoc; /* Clear all states, except that the table was updated */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - if (!(*info->read_record)(info,curdoc,record)) + if (!(*info->read_record)(info, record, curdoc)) { info->update|= HA_STATE_AKTIV; /* Record is read */ if (ftb->with_scan && maria_ft_boolean_find_relevance(ftb,record,0)==0) @@ -851,9 +851,9 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, (uchar*)ftbw->word + 1,ftbw->len - 1, (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) break; - if (ftbw->docid[1] == ftb->info->lastpos) + if (ftbw->docid[1] == ftb->info->cur_row.lastpos) continue; - ftbw->docid[1]= ftb->info->lastpos; + ftbw->docid[1]= ftb->info->cur_row.lastpos; _ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi); } return(0); @@ -877,7 +877,7 @@ float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) { FTB_EXPR *ftbe; FT_SEG_ITERATOR ftsi, ftsi2; - my_off_t docid=ftb->info->lastpos; + MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos; MY_FTB_FIND_PARAM ftb_param; MYSQL_FTPARSER_PARAM *param; struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c index 993857aecbb..4c922516455 100644 --- a/storage/maria/ma_ft_nlq_search.c +++ b/storage/maria/ma_ft_nlq_search.c @@ -69,8 +69,8 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) FT_SUPERDOC sdoc, *sptr; TREE_ELEMENT *selem; double gweight=1; - MARIA_HA *info=aio->info; - uchar *keybuff=aio->keybuff; + MARIA_HA *info= aio->info; + byte *keybuff= (byte*) aio->keybuff; MARIA_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr; my_off_t key_root=info->s->state.key_root[aio->keynr]; uint extra=HA_FT_WLEN+info->s->base.rec_reflength; @@ -92,7 +92,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) for (r= _ma_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ; !r && (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && - info->lastpos >= info->state->data_file_length ; + info->cur_row.lastpos >= info->state->data_file_length ; r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, SEARCH_BIGGER, key_root)) ; @@ -104,8 +104,9 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) { if (keylen && - ha_compare_text(aio->charset,info->lastkey+1, - info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) + ha_compare_text(aio->charset, + (uchar*) info->lastkey+1, info->lastkey_length-extra-1, + (uchar*) keybuff+1, keylen-1, 0, 0)) break; if (subkeys<0) @@ -118,7 +119,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) */ keybuff+=keylen; keyinfo=& info->s->ft2_keyinfo; - key_root=info->lastpos; + key_root= info->cur_row.lastpos; keylen=0; r= _ma_search_first(info, keyinfo, key_root); goto do_skip; @@ -132,7 +133,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) if (tmp_weight==0) DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */ - sdoc.doc.dpos=info->lastpos; + sdoc.doc.dpos= info->cur_row.lastpos; /* saving document matched into dtree */ if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg))) @@ -162,7 +163,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) SEARCH_BIGGER, key_root); do_skip: while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && - !r && info->lastpos >= info->state->data_file_length) + !r && info->cur_row.lastpos >= info->state->data_file_length) r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, SEARCH_BIGGER, key_root); @@ -209,22 +210,22 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, ALL_IN_ONE aio; FT_DOC *dptr; FT_INFO *dlist=NULL; - my_off_t saved_lastpos=info->lastpos; + MARIA_RECORD_POS saved_lastpos= info->cur_row.lastpos; struct st_mysql_ftparser *parser; MYSQL_FTPARSER_PARAM *ftparser_param; DBUG_ENTER("maria_ft_init_nlq_search"); -/* black magic ON */ + /* black magic ON */ if ((int) (keynr = _ma_check_index(info,keynr)) < 0) DBUG_RETURN(NULL); if (_ma_readinfo(info,F_RDLCK,1)) DBUG_RETURN(NULL); -/* black magic OFF */ + /* black magic OFF */ aio.info=info; aio.keynr=keynr; aio.charset=info->s->keyinfo[keynr].seg->charset; - aio.keybuff=info->lastkey+info->s->base.max_key_length; + aio.keybuff= (uchar*) info->lastkey+info->s->base.max_key_length; parser= info->s->keyinfo[keynr].parser; if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0))) goto err; @@ -254,7 +255,7 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, while (best.elements) { my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos; - if (!(*info->read_record)(info,docid,record)) + if (!(*info->read_record)(info, record, docid)) { info->update|= HA_STATE_AKTIV; ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY; @@ -296,7 +297,7 @@ FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, err: delete_tree(&aio.dtree); delete_tree(&wtree); - info->lastpos=saved_lastpos; + info->cur_row.lastpos= saved_lastpos; DBUG_RETURN(dlist); } @@ -313,8 +314,8 @@ int maria_ft_nlq_read_next(FT_INFO *handler, char *record) info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - info->lastpos=handler->doc[handler->curdoc].dpos; - if (!(*info->read_record)(info,info->lastpos,record)) + info->cur_row.lastpos= handler->doc[handler->curdoc].dpos; + if (!(*info->read_record)(info, record, info->cur_row.lastpos)) { info->update|= HA_STATE_AKTIV; /* Record is read */ return 0; @@ -329,7 +330,7 @@ float maria_ft_nlq_find_relevance(FT_INFO *handler, { int a,b,c; FT_DOC *docs=handler->doc; - my_off_t docid=handler->info->lastpos; + MARIA_RECORD_POS docid= handler->info->cur_row.lastpos; if (docid == HA_POS_ERROR) return -5.0; diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c index 965f9afc91d..25f4d5a67a0 100644 --- a/storage/maria/ma_ft_update.c +++ b/storage/maria/ma_ft_update.c @@ -141,7 +141,7 @@ static int _ma_ft_store(MARIA_HA *info, uint keynr, byte *keybuf, for (; wlist->pos; wlist++) { key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); - if (_ma_ck_write(info,keynr,(uchar*) keybuf,key_length)) + if (_ma_ck_write(info, keynr, keybuf, key_length)) DBUG_RETURN(1); } DBUG_RETURN(0); @@ -156,7 +156,7 @@ static int _ma_ft_erase(MARIA_HA *info, uint keynr, byte *keybuf, for (; wlist->pos; wlist++) { key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); - if (_ma_ck_delete(info,keynr,(uchar*) keybuf,key_length)) + if (_ma_ck_delete(info, keynr, keybuf, key_length)) err=1; } DBUG_RETURN(err); @@ -219,13 +219,13 @@ int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, if (cmp < 0 || cmp2) { key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos); - if ((error= _ma_ck_delete(info,keynr,(uchar*) keybuf,key_length))) + if ((error= _ma_ck_delete(info,keynr, keybuf,key_length))) goto err; } if (cmp > 0 || cmp2) { - key_length= _ma_ft_make_key(info,keynr,keybuf,new_word,pos); - if ((error= _ma_ck_write(info,keynr,(uchar*) keybuf,key_length))) + key_length= _ma_ft_make_key(info, keynr, keybuf, new_word,pos); + if ((error= _ma_ck_write(info, keynr, keybuf,key_length))) goto err; } if (cmp<=0) old_word++; @@ -277,8 +277,9 @@ int _ma_ft_del(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, DBUG_RETURN(error); } + uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr, - my_off_t filepos) + my_off_t filepos) { byte buf[HA_FT_MAXBYTELEN+16]; DBUG_ENTER("_ma_ft_make_key"); @@ -294,7 +295,7 @@ uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr, int2store(buf+HA_FT_WLEN,wptr->len); memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len); - DBUG_RETURN(_ma_make_key(info,keynr,(uchar*) keybuf,buf,filepos)); + DBUG_RETURN(_ma_make_key(info, keynr, keybuf, buf, filepos)); } @@ -302,12 +303,12 @@ uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr, convert key value to ft2 */ -uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key) +uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, byte *key) { my_off_t root; DYNAMIC_ARRAY *da=info->ft1_to_ft2; MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo; - uchar *key_ptr= (uchar*) dynamic_array_ptr(da, 0), *end; + byte *key_ptr= (byte*) dynamic_array_ptr(da, 0), *end; uint length, key_length; DBUG_ENTER("_ma_ft_convert_to_ft2"); @@ -329,13 +330,13 @@ uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key) /* creating pageful of keys */ maria_putint(info->buff,length+2,0); memcpy(info->buff+2, key_ptr, length); - info->buff_used=info->page_changed=1; /* info->buff is used */ + info->keybuff_used=info->page_changed=1; /* info->buff is used */ if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff)) DBUG_RETURN(-1); /* inserting the rest of key values */ - end= (uchar*) dynamic_array_ptr(da, da->elements); + end= (byte*) dynamic_array_ptr(da, da->elements); for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME)) DBUG_RETURN(-1); diff --git a/storage/maria/ma_fulltext.h b/storage/maria/ma_fulltext.h index 946a5628175..cf21471b316 100644 --- a/storage/maria/ma_fulltext.h +++ b/storage/maria/ma_fulltext.h @@ -25,4 +25,4 @@ int _ma_ft_cmp(MARIA_HA *, uint, const byte *, const byte *); int _ma_ft_add(MARIA_HA *, uint, byte *, const byte *, my_off_t); int _ma_ft_del(MARIA_HA *, uint, byte *, const byte *, my_off_t); -uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, uchar *); +uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, byte *); diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index b22ffa41833..397cd2465d4 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -23,9 +23,9 @@ /* Get position to last record */ -my_off_t maria_position(MARIA_HA *info) +MARIA_RECORD_POS maria_position(MARIA_HA *info) { - return info->lastpos; + return info->cur_row.lastpos; } @@ -38,7 +38,7 @@ int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) MARIA_SHARE *share=info->s; DBUG_ENTER("maria_status"); - x->recpos = info->lastpos; + x->recpos= info->cur_row.lastpos; if (flag == HA_STATUS_POS) DBUG_RETURN(0); /* Compatible with ISAM */ if (!(flag & HA_STATUS_NO_LOCK)) @@ -64,8 +64,8 @@ int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) } if (flag & HA_STATUS_ERRKEY) { - x->errkey = info->errkey; - x->dupp_key_pos= info->dupp_key_pos; + x->errkey= info->errkey; + x->dup_key_pos= info->dup_key_pos; } if (flag & HA_STATUS_CONST) { @@ -121,13 +121,17 @@ int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) void _ma_report_error(int errcode, const char *file_name) { - size_t lgt; + uint length; DBUG_ENTER("_ma_report_error"); DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name)); - if ((lgt= strlen(file_name)) > 64) - file_name+= lgt - 64; + if ((length= strlen(file_name)) > 64) + { + uint dir_length= dirname_length(file_name); + file_name+= dir_length; + if ((length-= dir_length) > 64) + file_name+= length - 64; + } my_error(errcode, MYF(ME_NOREFRESH), file_name); DBUG_VOID_RETURN; } - diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 318bbe341e4..b1d119732e2 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -18,6 +18,7 @@ #include "maria_def.h" #include +#include "ma_blockrec.h" static int maria_inited= 0; pthread_mutex_t THR_LOCK_maria; @@ -42,6 +43,7 @@ int maria_init(void) { maria_inited= 1; pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); + _ma_init_block_record_data(); } return 0; } diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index ecd51f5dc92..d366c9461d6 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -49,11 +49,11 @@ static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,byte *record); Length of key */ -uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, - const byte *record, my_off_t filepos) +uint _ma_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, MARIA_RECORD_POS filepos) { - byte *pos,*end; - uchar *start; + const byte *pos,*end; + byte *start; reg1 HA_KEYSEG *keyseg; my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; DBUG_ENTER("_ma_make_key"); @@ -64,7 +64,7 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, TODO: nulls processing */ #ifdef HAVE_SPATIAL - DBUG_RETURN(sp_make_key(info,keynr,key,record,filepos)); + DBUG_RETURN(sp_make_key(info,keynr, key,record,filepos)); #else DBUG_ASSERT(0); /* maria_open should check that this never happens*/ #endif @@ -91,17 +91,17 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : length); - pos= (byte*) record+keyseg->start; + pos= record+keyseg->start; if (type == HA_KEYTYPE_BIT) { if (keyseg->bit_length) { uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos, keyseg->bit_start, keyseg->bit_length); - *key++= bits; + *key++= (char) bits; length--; } - memcpy((byte*) key, pos, length); + memcpy(key, pos, length); key+= length; continue; } @@ -121,7 +121,7 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, length=(uint) (end-pos); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); - memcpy((byte*) key,(byte*) pos,(size_t) char_length); + memcpy(key, pos, (size_t) char_length); key+=char_length; continue; } @@ -134,18 +134,18 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, set_if_smaller(length,tmp_length); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); - memcpy((byte*) key,(byte*) pos,(size_t) char_length); + memcpy(key,pos,(size_t) char_length); key+= char_length; continue; } else if (keyseg->flag & HA_BLOB_PART) { uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); - memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*)); + memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*)); set_if_smaller(length,tmp_length); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); - memcpy((byte*) key,(byte*) pos,(size_t) char_length); + memcpy(key,pos,(size_t) char_length); key+= char_length; continue; } @@ -184,14 +184,14 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, continue; } FIX_LENGTH(cs, pos, length, char_length); - memcpy((byte*) key, pos, char_length); + memcpy(key, pos, char_length); if (length > char_length) cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); key+= length; } _ma_dpointer(info,key,filepos); DBUG_PRINT("exit",("keynr: %d",keynr)); - DBUG_DUMP("key",(byte*) start,(uint) (key-start)+keyseg->length); + DBUG_DUMP("key",start,(uint) (key-start)+keyseg->length); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,info->s->keyinfo[keynr].seg,start, (uint) (key-start));); @@ -217,10 +217,10 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, uchar *key, last_use_keyseg Store pointer to the keyseg after the last used one */ -uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, - uint k_length, HA_KEYSEG **last_used_keyseg) +uint _ma_pack_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *old, uint k_length, HA_KEYSEG **last_used_keyseg) { - uchar *start_key=key; + byte *start_key=key; HA_KEYSEG *keyseg; my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; DBUG_ENTER("_ma_pack_key"); @@ -232,7 +232,7 @@ uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; uint length=min((uint) keyseg->length,(uint) k_length); uint char_length; - uchar *pos; + const byte *pos; CHARSET_INFO *cs=keyseg->charset; if (keyseg->null_bit) @@ -249,11 +249,12 @@ uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, continue; /* Found NULL */ } } - char_length= (!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : length; - pos=old; + char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : + length); + pos= old; if (keyseg->flag & HA_SPACE_PACK) { - uchar *end=pos+length; + const byte *end= pos + length; if (type != HA_KEYTYPE_NUM) { while (end > pos && end[-1] == ' ') @@ -268,7 +269,7 @@ uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, length=(uint) (end-pos); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); - memcpy((byte*) key,pos,(size_t) char_length); + memcpy(key,pos,(size_t) char_length); key+= char_length; continue; } @@ -282,7 +283,7 @@ uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); old+=2; /* Skip length */ - memcpy((byte*) key, pos,(size_t) char_length); + memcpy(key, pos,(size_t) char_length); key+= char_length; continue; } @@ -297,7 +298,7 @@ uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, continue; } FIX_LENGTH(cs, pos, length, char_length); - memcpy((byte*) key, pos, char_length); + memcpy(key, pos, char_length); if (length > char_length) cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); key+= length; @@ -321,7 +322,7 @@ uint _ma_pack_key(register MARIA_HA *info, uint keynr, uchar *key, uchar *old, length+= keyseg->length; keyseg++; } while (keyseg->type); - bzero((byte*) key,length); + bzero(key,length); key+=length; } #endif @@ -358,8 +359,8 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, byte *blob_ptr; DBUG_ENTER("_ma_put_key_in_record"); - blob_ptr= (byte*) info->lastkey2; /* Place to put blob parts */ - key=(byte*) info->lastkey; /* KEy that was read */ + blob_ptr= info->lastkey2; /* Place to put blob parts */ + key=info->lastkey; /* KEy that was read */ key_end=key+info->lastkey_length; for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) { @@ -378,7 +379,7 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, if (keyseg->bit_length) { - uchar bits= *key++; + byte bits= *key++; set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start, keyseg->bit_length); length--; @@ -388,7 +389,7 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, keyseg->bit_length); } - memcpy(record + keyseg->start, (byte*) key, length); + memcpy(record + keyseg->start, key, length); key+= length; continue; } @@ -429,7 +430,7 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, else int2store(record+keyseg->start, length); /* And key data */ - memcpy(record+keyseg->start + keyseg->bit_start, (byte*) key, length); + memcpy(record+keyseg->start + keyseg->bit_start, key, length); key+= length; } else if (keyseg->flag & HA_BLOB_PART) @@ -472,8 +473,7 @@ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, if (key+keyseg->length > key_end) goto err; #endif - memcpy(record+keyseg->start,(byte*) key, - (size_t) keyseg->length); + memcpy(record+keyseg->start, key, (size_t) keyseg->length); key+= keyseg->length; } } @@ -486,7 +486,7 @@ err: /* Here when key reads are used */ -int _ma_read_key_record(MARIA_HA *info, my_off_t filepos, byte *buf) +int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) { fast_ma_writeinfo(info); if (filepos != HA_OFFSET_ERROR) @@ -526,7 +526,7 @@ ulonglong ma_retrieve_auto_increment(MARIA_HA *info,const byte *record) ulonglong value= 0; /* Store unsigned values here */ longlong s_value= 0; /* Store signed values here */ HA_KEYSEG *keyseg= info->s->keyinfo[info->s->base.auto_key-1].seg; - const uchar *key= (uchar*) record + keyseg->start; + const byte *key= record + keyseg->start; switch (keyseg->type) { case HA_KEYTYPE_INT8: diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c index 837b0fbac66..a2ea4349338 100644 --- a/storage/maria/ma_keycache.c +++ b/storage/maria/ma_keycache.c @@ -54,8 +54,9 @@ int maria_assign_to_key_cache(MARIA_HA *info, int error= 0; MARIA_SHARE* share= info->s; DBUG_ENTER("maria_assign_to_key_cache"); - DBUG_PRINT("enter",("old_key_cache_handle: %lx new_key_cache_handle: %lx", - share->key_cache, key_cache)); + DBUG_PRINT("enter", + ("old_key_cache_handle: 0x%lx new_key_cache_handle: 0x%lx", + share->key_cache, key_cache)); /* Skip operation if we didn't change key cache. This can happen if we diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index adb4b03bebe..62746eba875 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -60,13 +60,25 @@ int maria_lock_database(MARIA_HA *info, int lock_type) else count= --share->w_locks; --share->tot_locks; - if (info->lock_type == F_WRLCK && !share->w_locks && - !share->delay_key_write && flush_key_blocks(share->key_cache, - share->kfile,FLUSH_KEEP)) + if (info->lock_type == F_WRLCK && !share->w_locks) { - error=my_errno; - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); /* Mark that table must be checked */ + if (!share->delay_key_write && flush_key_blocks(share->key_cache, + share->kfile, + FLUSH_KEEP)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + /* Mark that table must be checked */ + maria_mark_crashed(info); + } + if (share->data_file_type == BLOCK_RECORD && + flush_key_blocks(share->key_cache, info->dfile, FLUSH_KEEP)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + /* Mark that table must be checked */ + maria_mark_crashed(info); + } } if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) { @@ -84,16 +96,17 @@ int maria_lock_database(MARIA_HA *info, int lock_type) if (share->changed && !share->w_locks) { #ifdef HAVE_MMAP - if ((info->s->mmaped_length != info->s->state.state.data_file_length) && - (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS)) - { - if (info->s->concurrent_insert) - rw_wrlock(&info->s->mmap_lock); - _ma_remap_file(info, info->s->state.state.data_file_length); - info->s->nonmmaped_inserts= 0; - if (info->s->concurrent_insert) - rw_unlock(&info->s->mmap_lock); - } + if ((info->s->mmaped_length != + info->s->state.state.data_file_length) && + (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS)) + { + if (info->s->concurrent_insert) + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, info->s->state.state.data_file_length); + info->s->nonmmaped_inserts= 0; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + } #endif share->state.process= share->last_process=share->this_process; share->state.unique= info->last_unique= info->this_unique; @@ -350,7 +363,7 @@ int _ma_readinfo(register MARIA_HA *info, int lock_type, int check_keybuffer) int _ma_writeinfo(register MARIA_HA *info, uint operation) { int error,olderror; - MARIA_SHARE *share=info->s; + MARIA_SHARE *share= info->s; DBUG_ENTER("_ma_writeinfo"); DBUG_PRINT("info",("operation: %u tot_locks: %u", operation, share->tot_locks)); @@ -358,13 +371,13 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation) error=0; if (share->tot_locks == 0) { - olderror=my_errno; /* Remember last error */ if (operation) { /* Two threads can't be here */ + olderror= my_errno; /* Remember last error */ share->state.process= share->last_process= share->this_process; share->state.unique= info->last_unique= info->this_unique; share->state.update_count= info->last_loop= ++info->this_loop; - if ((error=_ma_state_info_write(share->kfile, &share->state, 1))) + if ((error= _ma_state_info_write(share->kfile, &share->state, 1))) olderror=my_errno; #ifdef __WIN__ if (maria_flush) @@ -373,8 +386,8 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation) _commit(info->dfile); } #endif + my_errno=olderror; } - my_errno=olderror; } else if (operation) share->changed= 1; /* Mark keyfile changed */ diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 38e71a44f8b..cbc781c589e 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -19,6 +19,7 @@ #include "ma_fulltext.h" #include "ma_sp_defs.h" #include "ma_rt_index.h" +#include "ma_blockrec.h" #include #if defined(MSDOS) || defined(__WIN__) @@ -33,6 +34,12 @@ #endif static void setup_key_functions(MARIA_KEYDEF *keyinfo); +static my_bool maria_scan_init_dummy(MARIA_HA *info); +static void maria_scan_end_dummy(MARIA_HA *info); +static my_bool maria_once_init_dummy(MARIA_SHARE *, File); +static my_bool maria_once_end_dummy(MARIA_SHARE *); +static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base); + #define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \ pos+=size;} @@ -76,7 +83,7 @@ MARIA_HA *_ma_test_if_reopen(char *filename) MARIA_HA *maria_open(const char *name, int mode, uint open_flags) { int kfile,open_mode,save_errno,have_rtree=0; - uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys, + uint i,j,len,errpos,head_length,base_pos,info_length,keys, key_parts,unique_key_parts,fulltext_keys,uniques; char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN], data_name[FN_REFLEN]; @@ -84,13 +91,13 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) MARIA_HA info,*m_info,*old_info; MARIA_SHARE share_buff,*share; ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; - my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MARIA_MAX_KEY_BLOCK_SIZE]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY]; ulonglong max_key_file_length, max_data_file_length; DBUG_ENTER("maria_open"); LINT_INIT(m_info); kfile= -1; - errpos=0; + errpos= 0; head_length=sizeof(share_buff.state.header); bzero((byte*) &info,sizeof(info)); @@ -103,7 +110,6 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) bzero((gptr) &share_buff,sizeof(share_buff)); share_buff.state.rec_per_key_part=rec_per_key_part; share_buff.state.key_root=key_root; - share_buff.state.key_del=key_del; share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff), maria_key_cache); @@ -121,7 +127,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) goto err; } share->mode=open_mode; - errpos=1; + errpos= 1; if (my_read(kfile,(char*) share->state.header.file_version,head_length, MYF(MY_NABP))) { @@ -165,17 +171,17 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS); info_length=mi_uint2korr(share->state.header.header_length); - base_pos=mi_uint2korr(share->state.header.base_pos); + base_pos= mi_uint2korr(share->state.header.base_pos); if (!(disk_cache=(char*) my_alloca(info_length+128))) { my_errno=ENOMEM; goto err; } end_pos=disk_cache+info_length; - errpos=2; + errpos= 2; VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0))); - errpos=3; + errpos= 3; if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP))) { my_errno=HA_ERR_CRASHED; @@ -195,15 +201,14 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) } share->state_diff_length=len-MARIA_STATE_INFO_SIZE; - _ma_state_info_read((uchar*) disk_cache, &share->state); + _ma_state_info_read(disk_cache, &share->state); len= mi_uint2korr(share->state.header.base_info_length); if (len != MARIA_BASE_INFO_SIZE) { DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d", len,MARIA_BASE_INFO_SIZE)); } - disk_pos= (char*) - _ma_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); + disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base); share->state.state_length=base_pos; if (!(open_flags & HA_OPEN_FOR_REPAIR) && @@ -239,31 +244,13 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) (((ulonglong) 1 << (share->base.rec_reflength*8))-1) : (_ma_safe_mul(share->base.pack_reclength, (ulonglong) 1 << (share->base.rec_reflength*8))-1); + max_key_file_length= _ma_safe_mul(MARIA_MIN_KEY_BLOCK_LENGTH, ((ulonglong) 1 << (share->base.key_reflength*8))-1); #if SIZEOF_OFF_T == 4 set_if_smaller(max_data_file_length, INT_MAX32); set_if_smaller(max_key_file_length, INT_MAX32); -#endif -#if USE_RAID && SYSTEM_SIZEOF_OFF_T == 4 - set_if_smaller(max_key_file_length, INT_MAX32); - if (!share->base.raid_type) - { - set_if_smaller(max_data_file_length, INT_MAX32); - } - else - { - set_if_smaller(max_data_file_length, - (ulonglong) share->base.raid_chunks << 31); - } -#elif !defined(USE_RAID) - if (share->base.raid_type) - { - DBUG_PRINT("error",("Table uses RAID but we don't have RAID support")); - my_errno=HA_ERR_UNSUPPORTED; - goto err; - } #endif share->base.max_data_file_length=(my_off_t) max_data_file_length; share->base.max_key_file_length=(my_off_t) max_key_file_length; @@ -286,29 +273,25 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) &share->index_file_name,strlen(index_name)+1, &share->data_file_name,strlen(data_name)+1, &share->state.key_root,keys*sizeof(my_off_t), - &share->state.key_del, - (share->state.header.max_block_size_index*sizeof(my_off_t)), #ifdef THREAD &share->key_root_lock,sizeof(rw_lock_t)*keys, #endif &share->mmap_lock,sizeof(rw_lock_t), NullS)) goto err; - errpos=4; + errpos= 4; + *share=share_buff; memcpy((char*) share->state.rec_per_key_part, (char*) rec_per_key_part, sizeof(long)*key_parts); memcpy((char*) share->state.key_root, (char*) key_root, sizeof(my_off_t)*keys); - memcpy((char*) share->state.key_del, - (char*) key_del, (sizeof(my_off_t) * - share->state.header.max_block_size_index)); strmov(share->unique_file_name, name_buff); share->unique_name_length= strlen(name_buff); strmov(share->index_file_name, index_name); strmov(share->data_file_name, data_name); - share->blocksize=min(IO_SIZE,maria_block_size); + share->block_size= maria_block_size; { HA_KEYSEG *pos=share->keyparts; for (i=0 ; i < keys ; i++) @@ -319,7 +302,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) end_pos); if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE) have_rtree=1; - set_if_smaller(share->blocksize,share->keyinfo[i].block_length); + set_if_smaller(share->block_size,share->keyinfo[i].block_length); share->keyinfo[i].seg=pos; for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++) { @@ -423,28 +406,43 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) } share->ftparsers= 0; } - - disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, end_pos); - for (i=j=offset=0 ; i < share->base.fields ; i++) + share->data_file_type= share->state.header.data_file_type; + share->base_length= (BASE_ROW_HEADER_SIZE + + share->base.is_nulls_extended + + share->base.null_bytes + + share->base.pack_bytes + + test(share->options & HA_OPTION_CHECKSUM)); + if (share->base.transactional) + share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; + share->base.default_rec_buff_size= max(share->base.pack_reclength, + share->base.max_key_length); + if (share->data_file_type == DYNAMIC_RECORD) + { + share->base.extra_rec_buff_size= + (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH + + MARIA_REC_BUFF_OFFSET); + share->base.default_rec_buff_size+= share->base.extra_rec_buff_size; + } + disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, + end_pos); + for (i= j= 0 ; i < share->base.fields ; i++) { disk_pos=_ma_recinfo_read(disk_pos,&share->rec[i]); share->rec[i].pack_type=0; share->rec[i].huff_tree=0; - share->rec[i].offset=offset; if (share->rec[i].type == (int) FIELD_BLOB) { share->blobs[j].pack_length= share->rec[i].length-maria_portable_sizeof_char_ptr;; - share->blobs[j].offset=offset; + share->blobs[j].offset= share->rec[i].offset; j++; } - offset+=share->rec[i].length; } share->rec[i].type=(int) FIELD_LAST; /* End marker */ if (_ma_open_datafile(&info, share, -1)) goto err; - errpos=5; + errpos= 5; share->kfile=kfile; share->this_process=(ulong) getpid(); @@ -456,25 +454,12 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->rec_reflength=share->base.rec_reflength; /* May be changed */ share->base.margin_key_file_length=(share->base.max_key_file_length - (keys ? MARIA_INDEX_BLOCK_MARGIN * - share->blocksize * keys : 0)); - share->blocksize=min(IO_SIZE,maria_block_size); - share->data_file_type=STATIC_RECORD; - if (share->options & HA_OPTION_COMPRESS_RECORD) - { - share->data_file_type = COMPRESSED_RECORD; - share->options|= HA_OPTION_READ_ONLY_DATA; - info.s=share; - if (_ma_read_pack_info(&info, - (pbool) - test(!(share->options & - (HA_OPTION_PACK_RECORD | - HA_OPTION_TEMP_COMPRESS_RECORD))))) - goto err; - } - else if (share->options & HA_OPTION_PACK_RECORD) - share->data_file_type = DYNAMIC_RECORD; + share->block_size * keys : 0)); + share->block_size= share->base.block_size; my_afree((gptr) disk_cache); _ma_setup_functions(share); + if ((*share->once_init)(share, info.dfile)) + goto err; #ifdef THREAD thr_lock_init(&share->lock); VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST)); @@ -493,6 +478,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) HA_OPTION_COMPRESS_RECORD | HA_OPTION_TEMP_COMPRESS_RECORD)) || (open_flags & HA_OPEN_TMP_TABLE) || + share->data_file_type == BLOCK_RECORD || have_rtree) ? 0 : 1; if (share->concurrent_insert) { @@ -512,9 +498,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) my_errno=EACCES; /* Can't open in write mode */ goto err; } - if (_ma_open_datafile(&info, share, old_info->dfile)) + if (share->data_file_type == BLOCK_RECORD) + info.dfile= share->bitmap.file; + else if (_ma_open_datafile(&info, share, old_info->dfile)) goto err; - errpos=5; + errpos= 5; have_rtree= old_info->maria_rtree_recursion_state != NULL; } @@ -530,7 +518,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) &info.maria_rtree_recursion_state,have_rtree ? 1024 : 0, NullS)) goto err; - errpos=6; + errpos= 6; if (!have_rtree) info.maria_rtree_recursion_state= NULL; @@ -540,7 +528,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) info.lastkey2=info.lastkey+share->base.max_key_length; info.s=share; - info.lastpos= HA_OFFSET_ERROR; + info.cur_row.lastpos= HA_OFFSET_ERROR; info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND); info.opt_flag=READ_CHECK_USED; info.this_unique= (ulong) info.dfile; /* Uniq number in process */ @@ -557,8 +545,12 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) info.ft1_to_ft2=0; info.errkey= -1; info.page_changed=1; + info.keyread_buff= info.buff + share->base.max_key_block_length; + if ((*share->init)(&info)) + goto err; + pthread_mutex_lock(&share->intern_lock); - info.read_record=share->read_record; + info.read_record= share->read_record; share->reopen++; share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL); if (share->options & HA_OPTION_READ_ONLY_DATA) @@ -570,7 +562,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) if ((open_flags & HA_OPEN_TMP_TABLE) || (share->options & HA_OPTION_TMP_TABLE)) { - share->temporary=share->delay_key_write=1; + share->temporary= share->delay_key_write= 1; + share->write_flag=MYF(MY_NABP); share->w_locks++; /* We don't have to update status */ share->tot_locks++; @@ -580,15 +573,17 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) (share->options & HA_OPTION_DELAY_KEY_WRITE)) && maria_delay_key_write) share->delay_key_write=1; + info.state= &share->state.state; /* Change global values by default */ pthread_mutex_unlock(&share->intern_lock); /* Allocate buffer for one record */ - - /* prerequisites: bzero(info) && info->s=share; are met. */ - if (!_ma_alloc_rec_buff(&info, -1, &info.rec_buff)) + /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */ + if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size, + share->base.default_rec_buff_size)) goto err; - bzero(info.rec_buff, _ma_get_rec_buff_len(&info, info.rec_buff)); + + bzero(info.rec_buff, share->base.default_rec_buff_size); *m_info=info; #ifdef THREAD @@ -608,12 +603,15 @@ err: _ma_report_error(save_errno, name); switch (errpos) { case 6: + (*share->end)(&info); my_free((gptr) m_info,MYF(0)); /* fall through */ case 5: - VOID(my_close(info.dfile,MYF(0))); + if (share->data_file_type != BLOCK_RECORD) + VOID(my_close(info.dfile,MYF(0))); if (old_info) break; /* Don't remove open table */ + (*share->once_end)(share); /* fall through */ case 4: my_free((gptr) share,MYF(0)); @@ -636,38 +634,23 @@ err: } /* maria_open */ -byte *_ma_alloc_rec_buff(MARIA_HA *info, ulong length, byte **buf) +/* + Reallocate a buffer, if the current buffer is not large enough +*/ + +my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size, + my_size_t new_size) { - uint extra; - uint32 old_length; - LINT_INIT(old_length); - - if (! *buf || length > (old_length=_ma_get_rec_buff_len(info, *buf))) + if (*old_size < new_size) { - byte *newptr = *buf; - - /* to simplify initial init of info->rec_buf in maria_open and maria_extra */ - if (length == (ulong) -1) - { - length= max(info->s->base.pack_reclength, - info->s->base.max_key_length); - /* Avoid unnecessary realloc */ - if (newptr && length == old_length) - return newptr; - } - - extra= ((info->s->options & HA_OPTION_PACK_RECORD) ? - ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ - MARIA_REC_BUFF_OFFSET : 0); - if (extra && newptr) - newptr-= MARIA_REC_BUFF_OFFSET; - if (!(newptr=(byte*) my_realloc((gptr)newptr, length+extra+8, - MYF(MY_ALLOW_ZERO_PTR)))) - return newptr; - *((uint32 *) newptr)= (uint32) length; - *buf= newptr+(extra ? MARIA_REC_BUFF_OFFSET : 0); + byte *addr; + if (!(addr= (byte*) my_realloc((gptr) *old_addr, new_size, + MYF(MY_ALLOW_ZERO_PTR)))) + return 1; + *old_addr= addr; + *old_size= new_size; } - return *buf; + return 0; } @@ -684,28 +667,37 @@ ulonglong _ma_safe_mul(ulonglong a, ulonglong b) void _ma_setup_functions(register MARIA_SHARE *share) { - if (share->options & HA_OPTION_COMPRESS_RECORD) - { + share->once_init= maria_once_init_dummy; + share->once_end= maria_once_end_dummy; + share->init= maria_scan_init_dummy; + share->end= maria_scan_end_dummy; + share->scan_init= maria_scan_init_dummy; + share->scan_end= maria_scan_end_dummy; + share->write_record_init= _ma_write_init_default; + share->write_record_abort= _ma_write_abort_default; + + switch (share->data_file_type) { + case COMPRESSED_RECORD: share->read_record= _ma_read_pack_record; - share->read_rnd= _ma_read_rnd_pack_record; - if (!(share->options & HA_OPTION_TEMP_COMPRESS_RECORD)) - share->calc_checksum=0; /* No checksum */ - else if (share->options & HA_OPTION_PACK_RECORD) - share->calc_checksum= _ma_checksum; - else + share->scan= _ma_read_rnd_pack_record; + share->once_init= _ma_once_init_pack_row; + share->once_end= _ma_once_end_pack_row; + /* Calculate checksum according how the original row was stored */ + if (share->state.header.org_data_file_type == STATIC_RECORD) share->calc_checksum= _ma_static_checksum; - } - else if (share->options & HA_OPTION_PACK_RECORD) - { + else + share->calc_checksum= _ma_checksum; + share->calc_write_checksum= share->calc_checksum; + break; + case DYNAMIC_RECORD: share->read_record= _ma_read_dynamic_record; - share->read_rnd= _ma_read_rnd_dynamic_record; + share->scan= _ma_read_rnd_dynamic_record; share->delete_record= _ma_delete_dynamic_record; share->compare_record= _ma_cmp_dynamic_record; share->compare_unique= _ma_cmp_dynamic_unique; - share->calc_checksum= _ma_checksum; - + share->calc_checksum= share->calc_write_checksum= _ma_checksum; /* add bits used to pack data to pack_reclength for faster allocation */ - share->base.pack_reclength+= share->base.pack_bits; + share->base.pack_reclength+= share->base.pack_bytes; if (share->base.blobs) { share->update_record= _ma_update_blob_record; @@ -716,22 +708,42 @@ void _ma_setup_functions(register MARIA_SHARE *share) share->write_record= _ma_write_dynamic_record; share->update_record= _ma_update_dynamic_record; } - } - else - { + break; + case STATIC_RECORD: share->read_record= _ma_read_static_record; - share->read_rnd= _ma_read_rnd_static_record; + share->scan= _ma_read_rnd_static_record; share->delete_record= _ma_delete_static_record; share->compare_record= _ma_cmp_static_record; share->update_record= _ma_update_static_record; share->write_record= _ma_write_static_record; share->compare_unique= _ma_cmp_static_unique; - share->calc_checksum= _ma_static_checksum; + share->calc_checksum= share->calc_write_checksum= _ma_static_checksum; + break; + case BLOCK_RECORD: + share->once_init= _ma_once_init_block_row; + share->once_end= _ma_once_end_block_row; + share->init= _ma_init_block_row; + share->end= _ma_end_block_row; + share->write_record_init= _ma_write_init_block_record; + share->write_record_abort= _ma_write_abort_block_record; + share->scan_init= _ma_scan_init_block_record; + share->scan_end= _ma_scan_end_block_record; + share->read_record= _ma_read_block_record; + share->scan= _ma_scan_block_record; + share->delete_record= _ma_delete_block_record; + share->compare_record= _ma_compare_block_record; + share->update_record= _ma_update_block_record; + share->write_record= _ma_write_block_record; + share->compare_unique= _ma_cmp_block_unique; + share->calc_checksum= _ma_checksum; + share->calc_write_checksum= 0; + break; } share->file_read= _ma_nommap_pread; share->file_write= _ma_nommap_pwrite; - if (!(share->options & HA_OPTION_CHECKSUM)) - share->calc_checksum=0; + if (!(share->options & HA_OPTION_CHECKSUM) && + share->data_file_type != COMPRESSED_RECORD) + share->calc_checksum= share->calc_write_checksum= 0; return; } @@ -798,55 +810,53 @@ uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite) { uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; uchar *ptr=buff; - uint i, keys= (uint) state->header.keys, - key_blocks=state->header.max_block_size_index; + uint i, keys= (uint) state->header.keys; DBUG_ENTER("_ma_state_info_write"); memcpy_fixed(ptr,&state->header,sizeof(state->header)); ptr+=sizeof(state->header); /* open_count must be first because of _ma_mark_file_changed ! */ - mi_int2store(ptr,state->open_count); ptr +=2; - *ptr++= (uchar)state->changed; *ptr++= state->sortkey; - mi_rowstore(ptr,state->state.records); ptr +=8; - mi_rowstore(ptr,state->state.del); ptr +=8; - mi_rowstore(ptr,state->split); ptr +=8; - mi_sizestore(ptr,state->dellink); ptr +=8; - mi_sizestore(ptr,state->state.key_file_length); ptr +=8; - mi_sizestore(ptr,state->state.data_file_length); ptr +=8; - mi_sizestore(ptr,state->state.empty); ptr +=8; - mi_sizestore(ptr,state->state.key_empty); ptr +=8; - mi_int8store(ptr,state->auto_increment); ptr +=8; - mi_int8store(ptr,(ulonglong) state->state.checksum);ptr +=8; - mi_int4store(ptr,state->process); ptr +=4; - mi_int4store(ptr,state->unique); ptr +=4; - mi_int4store(ptr,state->status); ptr +=4; - mi_int4store(ptr,state->update_count); ptr +=4; + mi_int2store(ptr,state->open_count); ptr+= 2; + *ptr++= (uchar)state->changed; + *ptr++= state->sortkey; + mi_rowstore(ptr,state->state.records); ptr+= 8; + mi_rowstore(ptr,state->state.del); ptr+= 8; + mi_rowstore(ptr,state->split); ptr+= 8; + mi_sizestore(ptr,state->dellink); ptr+= 8; + mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8; + mi_sizestore(ptr,state->state.key_file_length); ptr+= 8; + mi_sizestore(ptr,state->state.data_file_length); ptr+= 8; + mi_sizestore(ptr,state->state.empty); ptr+= 8; + mi_sizestore(ptr,state->state.key_empty); ptr+= 8; + mi_int8store(ptr,state->auto_increment); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8; + mi_int4store(ptr,state->process); ptr+= 4; + mi_int4store(ptr,state->unique); ptr+= 4; + mi_int4store(ptr,state->status); ptr+= 4; + mi_int4store(ptr,state->update_count); ptr+= 4; - ptr+=state->state_diff_length; + ptr+= state->state_diff_length; for (i=0; i < keys; i++) { - mi_sizestore(ptr,state->key_root[i]); ptr +=8; + mi_sizestore(ptr,state->key_root[i]); ptr+= 8; } - for (i=0; i < key_blocks; i++) - { - mi_sizestore(ptr,state->key_del[i]); ptr +=8; - } - if (pWrite & 2) /* From isamchk */ + mi_sizestore(ptr,state->key_del); ptr+= 8; + if (pWrite & 2) /* From maria_chk */ { uint key_parts= mi_uint2korr(state->header.key_parts); - mi_int4store(ptr,state->sec_index_changed); ptr +=4; - mi_int4store(ptr,state->sec_index_used); ptr +=4; - mi_int4store(ptr,state->version); ptr +=4; - mi_int8store(ptr,state->key_map); ptr +=8; - mi_int8store(ptr,(ulonglong) state->create_time); ptr +=8; - mi_int8store(ptr,(ulonglong) state->recover_time); ptr +=8; - mi_int8store(ptr,(ulonglong) state->check_time); ptr +=8; - mi_sizestore(ptr,state->rec_per_key_rows); ptr+=8; + mi_int4store(ptr,state->sec_index_changed); ptr+= 4; + mi_int4store(ptr,state->sec_index_used); ptr+= 4; + mi_int4store(ptr,state->version); ptr+= 4; + mi_int8store(ptr,state->key_map); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8; + mi_sizestore(ptr,state->rec_per_key_rows); ptr+= 8; for (i=0 ; i < key_parts ; i++) { - mi_int4store(ptr,state->rec_per_key_part[i]); ptr+=4; + mi_int4store(ptr,state->rec_per_key_part[i]); ptr+=4; } } @@ -858,54 +868,51 @@ uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite) } -uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state) +byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state) { - uint i,keys,key_parts,key_blocks; + uint i,keys,key_parts; memcpy_fixed(&state->header,ptr, sizeof(state->header)); - ptr +=sizeof(state->header); - keys=(uint) state->header.keys; - key_parts=mi_uint2korr(state->header.key_parts); - key_blocks=state->header.max_block_size_index; + ptr+= sizeof(state->header); + keys= (uint) state->header.keys; + key_parts= mi_uint2korr(state->header.key_parts); - state->open_count = mi_uint2korr(ptr); ptr +=2; - state->changed= (bool) *ptr++; - state->sortkey = (uint) *ptr++; - state->state.records= mi_rowkorr(ptr); ptr +=8; - state->state.del = mi_rowkorr(ptr); ptr +=8; - state->split = mi_rowkorr(ptr); ptr +=8; - state->dellink= mi_sizekorr(ptr); ptr +=8; - state->state.key_file_length = mi_sizekorr(ptr); ptr +=8; - state->state.data_file_length= mi_sizekorr(ptr); ptr +=8; - state->state.empty = mi_sizekorr(ptr); ptr +=8; - state->state.key_empty= mi_sizekorr(ptr); ptr +=8; - state->auto_increment=mi_uint8korr(ptr); ptr +=8; - state->state.checksum=(ha_checksum) mi_uint8korr(ptr); ptr +=8; - state->process= mi_uint4korr(ptr); ptr +=4; - state->unique = mi_uint4korr(ptr); ptr +=4; - state->status = mi_uint4korr(ptr); ptr +=4; - state->update_count=mi_uint4korr(ptr); ptr +=4; + state->open_count = mi_uint2korr(ptr); ptr+= 2; + state->changed= (my_bool) *ptr++; + state->sortkey= (uint) *ptr++; + state->state.records= mi_rowkorr(ptr); ptr+= 8; + state->state.del = mi_rowkorr(ptr); ptr+= 8; + state->split = mi_rowkorr(ptr); ptr+= 8; + state->dellink= mi_sizekorr(ptr); ptr+= 8; + state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8; + state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8; + state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8; + state->state.empty = mi_sizekorr(ptr); ptr+= 8; + state->state.key_empty= mi_sizekorr(ptr); ptr+= 8; + state->auto_increment=mi_uint8korr(ptr); ptr+= 8; + state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8; + state->process= mi_uint4korr(ptr); ptr+= 4; + state->unique = mi_uint4korr(ptr); ptr+= 4; + state->status = mi_uint4korr(ptr); ptr+= 4; + state->update_count=mi_uint4korr(ptr); ptr+= 4; ptr+= state->state_diff_length; for (i=0; i < keys; i++) { - state->key_root[i]= mi_sizekorr(ptr); ptr +=8; + state->key_root[i]= mi_sizekorr(ptr); ptr+= 8; } - for (i=0; i < key_blocks; i++) - { - state->key_del[i] = mi_sizekorr(ptr); ptr +=8; - } - state->sec_index_changed = mi_uint4korr(ptr); ptr +=4; - state->sec_index_used = mi_uint4korr(ptr); ptr +=4; - state->version = mi_uint4korr(ptr); ptr +=4; - state->key_map = mi_uint8korr(ptr); ptr +=8; - state->create_time = (time_t) mi_sizekorr(ptr); ptr +=8; - state->recover_time =(time_t) mi_sizekorr(ptr); ptr +=8; - state->check_time = (time_t) mi_sizekorr(ptr); ptr +=8; - state->rec_per_key_rows=mi_sizekorr(ptr); ptr +=8; + state->key_del= mi_sizekorr(ptr); ptr+= 8; + state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4; + state->sec_index_used = mi_uint4korr(ptr); ptr+= 4; + state->version = mi_uint4korr(ptr); ptr+= 4; + state->key_map = mi_uint8korr(ptr); ptr+= 8; + state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8; + state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8; + state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8; + state->rec_per_key_rows=mi_sizekorr(ptr); ptr+= 8; for (i=0 ; i < key_parts ; i++) { - state->rec_per_key_part[i]= mi_uint4korr(ptr); ptr+=4; + state->rec_per_key_part[i]= mi_uint4korr(ptr); ptr+=4; } return ptr; } @@ -924,7 +931,7 @@ uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, my_bool pRead) } else if (my_read(file, buff, state->state_length,MYF(MY_NABP))) return (MY_FILE_ERROR); - _ma_state_info_read((uchar*) buff, state); + _ma_state_info_read(buff, state); } return 0; } @@ -938,74 +945,84 @@ uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) { uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff; - mi_sizestore(ptr,base->keystart); ptr +=8; - mi_sizestore(ptr,base->max_data_file_length); ptr +=8; - mi_sizestore(ptr,base->max_key_file_length); ptr +=8; - mi_rowstore(ptr,base->records); ptr +=8; - mi_rowstore(ptr,base->reloc); ptr +=8; - mi_int4store(ptr,base->mean_row_length); ptr +=4; - mi_int4store(ptr,base->reclength); ptr +=4; - mi_int4store(ptr,base->pack_reclength); ptr +=4; - mi_int4store(ptr,base->min_pack_length); ptr +=4; - mi_int4store(ptr,base->max_pack_length); ptr +=4; - mi_int4store(ptr,base->min_block_length); ptr +=4; - mi_int4store(ptr,base->fields); ptr +=4; - mi_int4store(ptr,base->pack_fields); ptr +=4; - *ptr++=base->rec_reflength; - *ptr++=base->key_reflength; - *ptr++=base->keys; - *ptr++=base->auto_key; - mi_int2store(ptr,base->pack_bits); ptr +=2; - mi_int2store(ptr,base->blobs); ptr +=2; - mi_int2store(ptr,base->max_key_block_length); ptr +=2; - mi_int2store(ptr,base->max_key_length); ptr +=2; - mi_int2store(ptr,base->extra_alloc_bytes); ptr +=2; + mi_sizestore(ptr,base->keystart); ptr+= 8; + mi_sizestore(ptr,base->max_data_file_length); ptr+= 8; + mi_sizestore(ptr,base->max_key_file_length); ptr+= 8; + mi_rowstore(ptr,base->records); ptr+= 8; + mi_rowstore(ptr,base->reloc); ptr+= 8; + mi_int4store(ptr,base->mean_row_length); ptr+= 4; + mi_int4store(ptr,base->reclength); ptr+= 4; + mi_int4store(ptr,base->pack_reclength); ptr+= 4; + mi_int4store(ptr,base->min_pack_length); ptr+= 4; + mi_int4store(ptr,base->max_pack_length); ptr+= 4; + mi_int4store(ptr,base->min_block_length); ptr+= 4; + mi_int2store(ptr,base->fields); ptr+= 2; + mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2; + mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2; + mi_int2store(ptr,base->max_field_lengths); ptr+= 2; + mi_int2store(ptr,base->pack_fields); ptr+= 2; + mi_int2store(ptr,0); ptr+= 2; + mi_int2store(ptr,base->null_bytes); ptr+= 2; + mi_int2store(ptr,base->original_null_bytes); ptr+= 2; + mi_int2store(ptr,base->field_offsets); ptr+= 2; + mi_int2store(ptr,base->min_row_length); ptr+= 2; + mi_int2store(ptr,base->block_size); ptr+= 2; + *ptr++= base->rec_reflength; + *ptr++= base->key_reflength; + *ptr++= base->keys; + *ptr++= base->auto_key; + *ptr++= base->transactional; + *ptr++= 0; /* Reserved */ + mi_int2store(ptr,base->pack_bytes); ptr+= 2; + mi_int2store(ptr,base->blobs); ptr+= 2; + mi_int2store(ptr,base->max_key_block_length); ptr+= 2; + mi_int2store(ptr,base->max_key_length); ptr+= 2; + mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2; *ptr++= base->extra_alloc_procent; - *ptr++= base->raid_type; - mi_int2store(ptr,base->raid_chunks); ptr +=2; - mi_int4store(ptr,base->raid_chunksize); ptr +=4; - bzero(ptr,6); ptr +=6; /* extra */ + bzero(ptr,16); ptr+= 16; /* extra */ + DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE); return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); } -uchar *_ma_n_base_info_read(uchar *ptr, MARIA_BASE_INFO *base) +static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base) { - base->keystart = mi_sizekorr(ptr); ptr +=8; - base->max_data_file_length = mi_sizekorr(ptr); ptr +=8; - base->max_key_file_length = mi_sizekorr(ptr); ptr +=8; - base->records = (ha_rows) mi_sizekorr(ptr); ptr +=8; - base->reloc = (ha_rows) mi_sizekorr(ptr); ptr +=8; - base->mean_row_length = mi_uint4korr(ptr); ptr +=4; - base->reclength = mi_uint4korr(ptr); ptr +=4; - base->pack_reclength = mi_uint4korr(ptr); ptr +=4; - base->min_pack_length = mi_uint4korr(ptr); ptr +=4; - base->max_pack_length = mi_uint4korr(ptr); ptr +=4; - base->min_block_length = mi_uint4korr(ptr); ptr +=4; - base->fields = mi_uint4korr(ptr); ptr +=4; - base->pack_fields = mi_uint4korr(ptr); ptr +=4; + base->keystart= mi_sizekorr(ptr); ptr+= 8; + base->max_data_file_length= mi_sizekorr(ptr); ptr+= 8; + base->max_key_file_length= mi_sizekorr(ptr); ptr+= 8; + base->records= (ha_rows) mi_sizekorr(ptr); ptr+= 8; + base->reloc= (ha_rows) mi_sizekorr(ptr); ptr+= 8; + base->mean_row_length= mi_uint4korr(ptr); ptr+= 4; + base->reclength= mi_uint4korr(ptr); ptr+= 4; + base->pack_reclength= mi_uint4korr(ptr); ptr+= 4; + base->min_pack_length= mi_uint4korr(ptr); ptr+= 4; + base->max_pack_length= mi_uint4korr(ptr); ptr+= 4; + base->min_block_length= mi_uint4korr(ptr); ptr+= 4; + base->fields= mi_uint2korr(ptr); ptr+= 2; + base->fixed_not_null_fields= mi_uint2korr(ptr); ptr+= 2; + base->fixed_not_null_fields_length= mi_uint2korr(ptr);ptr+= 2; + base->max_field_lengths= mi_uint2korr(ptr); ptr+= 2; + base->pack_fields= mi_uint2korr(ptr); ptr+= 2; + ptr+= 2; + base->null_bytes= mi_uint2korr(ptr); ptr+= 2; + base->original_null_bytes= mi_uint2korr(ptr); ptr+= 2; + base->field_offsets= mi_uint2korr(ptr); ptr+= 2; + base->min_row_length= mi_uint2korr(ptr); ptr+= 2; + base->block_size= mi_uint2korr(ptr); ptr+= 2; - base->rec_reflength = *ptr++; - base->key_reflength = *ptr++; - base->keys= *ptr++; - base->auto_key= *ptr++; - base->pack_bits = mi_uint2korr(ptr); ptr +=2; - base->blobs = mi_uint2korr(ptr); ptr +=2; - base->max_key_block_length= mi_uint2korr(ptr); ptr +=2; - base->max_key_length = mi_uint2korr(ptr); ptr +=2; - base->extra_alloc_bytes = mi_uint2korr(ptr); ptr +=2; - base->extra_alloc_procent = *ptr++; - base->raid_type= *ptr++; - base->raid_chunks= mi_uint2korr(ptr); ptr +=2; - base->raid_chunksize= mi_uint4korr(ptr); ptr +=4; - /* TO BE REMOVED: Fix for old RAID files */ - if (base->raid_type == 0) - { - base->raid_chunks=0; - base->raid_chunksize=0; - } - - ptr+=6; + base->rec_reflength= *ptr++; + base->key_reflength= *ptr++; + base->keys= *ptr++; + base->auto_key= *ptr++; + base->transactional= *ptr++; + ptr++; + base->pack_bytes= mi_uint2korr(ptr); ptr+= 2; + base->blobs= mi_uint2korr(ptr); ptr+= 2; + base->max_key_block_length= mi_uint2korr(ptr); ptr+= 2; + base->max_key_length= mi_uint2korr(ptr); ptr+= 2; + base->extra_alloc_bytes= mi_uint2korr(ptr); ptr+= 2; + base->extra_alloc_procent= *ptr++; + ptr+= 16; return ptr; } @@ -1018,13 +1035,13 @@ uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef) uchar buff[MARIA_KEYDEF_SIZE]; uchar *ptr=buff; - *ptr++ = (uchar) keydef->keysegs; - *ptr++ = keydef->key_alg; /* Rtree or Btree */ - mi_int2store(ptr,keydef->flag); ptr +=2; - mi_int2store(ptr,keydef->block_length); ptr +=2; - mi_int2store(ptr,keydef->keylength); ptr +=2; - mi_int2store(ptr,keydef->minlength); ptr +=2; - mi_int2store(ptr,keydef->maxlength); ptr +=2; + *ptr++= (uchar) keydef->keysegs; + *ptr++= keydef->key_alg; /* Rtree or Btree */ + mi_int2store(ptr,keydef->flag); ptr+= 2; + mi_int2store(ptr,keydef->block_length); ptr+= 2; + mi_int2store(ptr,keydef->keylength); ptr+= 2; + mi_int2store(ptr,keydef->minlength); ptr+= 2; + mi_int2store(ptr,keydef->maxlength); ptr+= 2; return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); } @@ -1033,12 +1050,11 @@ char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef) keydef->keysegs = (uint) *ptr++; keydef->key_alg = *ptr++; /* Rtree or Btree */ - keydef->flag = mi_uint2korr(ptr); ptr +=2; - keydef->block_length = mi_uint2korr(ptr); ptr +=2; - keydef->keylength = mi_uint2korr(ptr); ptr +=2; - keydef->minlength = mi_uint2korr(ptr); ptr +=2; - keydef->maxlength = mi_uint2korr(ptr); ptr +=2; - keydef->block_size_index = keydef->block_length/MARIA_MIN_KEY_BLOCK_LENGTH-1; + keydef->flag = mi_uint2korr(ptr); ptr+= 2; + keydef->block_length = mi_uint2korr(ptr); ptr+= 2; + keydef->keylength = mi_uint2korr(ptr); ptr+= 2; + keydef->minlength = mi_uint2korr(ptr); ptr+= 2; + keydef->maxlength = mi_uint2korr(ptr); ptr+= 2; keydef->underflow_block_length=keydef->block_length/3; keydef->version = 0; /* Not saved */ keydef->parser = &ft_default_parser; @@ -1062,9 +1078,9 @@ int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg) *ptr++= keyseg->bit_start; *ptr++= keyseg->bit_end; *ptr++= keyseg->bit_length; - mi_int2store(ptr,keyseg->flag); ptr+=2; - mi_int2store(ptr,keyseg->length); ptr+=2; - mi_int4store(ptr,keyseg->start); ptr+=4; + mi_int2store(ptr,keyseg->flag); ptr+= 2; + mi_int2store(ptr,keyseg->length); ptr+= 2; + mi_int4store(ptr,keyseg->start); ptr+= 4; pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos; mi_int4store(ptr, pos); ptr+=4; @@ -1081,10 +1097,10 @@ char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg) keyseg->bit_start = *ptr++; keyseg->bit_end = *ptr++; keyseg->bit_length = *ptr++; - keyseg->flag = mi_uint2korr(ptr); ptr +=2; - keyseg->length = mi_uint2korr(ptr); ptr +=2; - keyseg->start = mi_uint4korr(ptr); ptr +=4; - keyseg->null_pos = mi_uint4korr(ptr); ptr +=4; + keyseg->flag = mi_uint2korr(ptr); ptr+= 2; + keyseg->length = mi_uint2korr(ptr); ptr+= 2; + keyseg->start = mi_uint4korr(ptr); ptr+= 4; + keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4; keyseg->charset=0; /* Will be filled in later */ if (keyseg->null_bit) keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7)); @@ -1129,47 +1145,44 @@ uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo) uchar buff[MARIA_COLUMNDEF_SIZE]; uchar *ptr=buff; - mi_int2store(ptr,recinfo->type); ptr +=2; - mi_int2store(ptr,recinfo->length); ptr +=2; - *ptr++ = recinfo->null_bit; - mi_int2store(ptr,recinfo->null_pos); ptr+= 2; + mi_int6store(ptr,recinfo->offset); ptr+= 6; + mi_int2store(ptr,recinfo->type); ptr+= 2; + mi_int2store(ptr,recinfo->length); ptr+= 2; + mi_int2store(ptr,recinfo->fill_length); ptr+= 2; + mi_int2store(ptr,recinfo->null_pos); ptr+= 2; + mi_int2store(ptr,recinfo->empty_pos); ptr+= 2; + (*ptr++)= recinfo->null_bit; + (*ptr++)= recinfo->empty_bit; return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); } char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo) { - recinfo->type= mi_sint2korr(ptr); ptr +=2; - recinfo->length=mi_uint2korr(ptr); ptr +=2; - recinfo->null_bit= (uint8) *ptr++; - recinfo->null_pos=mi_uint2korr(ptr); ptr +=2; - return ptr; + recinfo->offset= mi_uint6korr(ptr); ptr+= 6; + recinfo->type= mi_sint2korr(ptr); ptr+= 2; + recinfo->length= mi_uint2korr(ptr); ptr+= 2; + recinfo->fill_length= mi_uint2korr(ptr); ptr+= 2; + recinfo->null_pos= mi_uint2korr(ptr); ptr+= 2; + recinfo->empty_pos= mi_uint2korr(ptr); ptr+= 2; + recinfo->null_bit= (uint8) *ptr++; + recinfo->empty_bit= (uint8) *ptr++; + return ptr; } /************************************************************************** -Open data file with or without RAID -We can't use dup() here as the data file descriptors need to have different -active seek-positions. + Open data file + We can't use dup() here as the data file descriptors need to have different + active seek-positions. -The argument file_to_dup is here for the future if there would on some OS -exist a dup()-like call that would give us two different file descriptors. + The argument file_to_dup is here for the future if there would on some OS + exist a dup()-like call that would give us two different file descriptors. *************************************************************************/ -int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup __attribute__((unused))) +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, + File file_to_dup __attribute__((unused))) { -#ifdef USE_RAID - if (share->base.raid_type) - { - info->dfile=my_raid_open(share->data_file_name, - share->mode | O_SHARE, - share->base.raid_type, - share->base.raid_chunks, - share->base.raid_chunksize, - MYF(MY_WME | MY_RAID)); - } - else -#endif - info->dfile=my_open(share->data_file_name, share->mode | O_SHARE, - MYF(MY_WME)); + info->dfile= my_open(share->data_file_name, share->mode | O_SHARE, + MYF(MY_WME)); return info->dfile >= 0 ? 0 : 1; } @@ -1264,3 +1277,25 @@ int maria_indexes_are_disabled(MARIA_HA *info) return (! maria_is_any_key_active(share->state.key_map) && share->base.keys); } + + +static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + +static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused))) +{ +} + +static my_bool maria_once_init_dummy(MARIA_SHARE *share + __attribute__((unused)), + File dfile __attribute__((unused))) +{ + return 0; +} + +static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused))) +{ + return 0; +} diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c index eb99e299f9a..69d2f15ca16 100644 --- a/storage/maria/ma_packrec.c +++ b/storage/maria/ma_packrec.c @@ -44,7 +44,10 @@ #define OFFSET_TABLE_SIZE 512 -static uint read_huff_table(MARIA_BIT_BUFF *bit_buff,MARIA_DECODE_TREE *decode_tree, +static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, + pbool fix_keys); +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree, uint16 **decode_table,byte **intervall_buff, uint16 *tmp_buff); static void make_quick_table(uint16 *to_table,uint16 *decode_table, @@ -56,55 +59,61 @@ static uint copy_decode_table(uint16 *to_pos,uint offset, uint16 *decode_table); static uint find_longest_bitstream(uint16 *table, uint16 *end); static void (*get_unpack_function(MARIA_COLUMNDEF *rec))(MARIA_COLUMNDEF *field, - MARIA_BIT_BUFF *buff, - uchar *to, - uchar *end); -static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + MARIA_BIT_BUFF *buff, + byte *to, + byte *end); +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); static void uf_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_space_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); -static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end); + byte *to,byte *end); +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); static void uf_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_space_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); -static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end); + byte *to,byte *end); +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); static void uf_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_space_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_zerofill_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_constant(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_intervall(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); + byte *to,byte *end); static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end); + byte *to, byte *end); static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end); + byte *to, byte *end); static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end); + byte *to, byte *end); static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, - uchar *to,uchar *end); -static uint decode_pos(MARIA_BIT_BUFF *bit_buff,MARIA_DECODE_TREE *decode_tree); -static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer,uint length); + byte *to,byte *end); +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree); +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer, + uint length); static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff,uint count); static void fill_buffer(MARIA_BIT_BUFF *bit_buff); static uint max_bit(uint value); static uint read_pack_length(uint version, const uchar *buf, ulong *length); #ifdef HAVE_MMAP -static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria, + MARIA_BLOCK_INFO *info, uchar *header); #endif @@ -121,21 +130,43 @@ static maria_bit_type mask[]= 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff, #endif - }; +}; - /* Read all packed info, allocate memory and fix field structs */ - -my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys) +my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile) +{ + share->options|= HA_OPTION_READ_ONLY_DATA; + if (_ma_read_pack_info(share, dfile, + (pbool) + test(!(share->options & + (HA_OPTION_PACK_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD))))) + return 1; + return 0; +} + +my_bool _ma_once_end_pack_row(MARIA_SHARE *share) +{ + if (share->decode_trees) + { + my_free((gptr) share->decode_trees,MYF(0)); + my_free((gptr) share->decode_tables,MYF(0)); + } + return 0; +} + + +/* Read all packed info, allocate memory and fix field structs */ + +static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, + pbool fix_keys) { - File file; int diff_length; uint i,trees,huff_tree_bits,rec_reflength,length; uint16 *decode_table,*tmp_buff; ulong elements,intervall_length; char *disk_cache,*intervall_buff; uchar header[32]; - MARIA_SHARE *share=info->s; MARIA_BIT_BUFF bit_buff; DBUG_ENTER("_ma_read_pack_info"); @@ -144,7 +175,6 @@ my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys) else if (maria_quick_table_bits > MAX_QUICK_TABLE_BITS) maria_quick_table_bits=MAX_QUICK_TABLE_BITS; - file=info->dfile; my_errno=0; if (my_read(file,(byte*) header,sizeof(header),MYF(MY_NABP))) { @@ -206,7 +236,7 @@ my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys) share->rec[i].space_length_bits=get_bits(&bit_buff,5); share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff, huff_tree_bits); - share->rec[i].unpack=get_unpack_function(share->rec+i); + share->rec[i].unpack= get_unpack_function(share->rec+i); } skip_to_next_byte(&bit_buff); decode_table=share->decode_tables; @@ -257,7 +287,8 @@ err0: /* Read on huff-code-table from datafile */ -static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, MARIA_DECODE_TREE *decode_tree, +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree, uint16 **decode_table, byte **intervall_buff, uint16 *tmp_buff) { @@ -432,7 +463,7 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end) HA_ERR_WRONG_IN_RECORD or -1 on error */ -int _ma_read_pack_record(MARIA_HA *info, my_off_t filepos, byte *buf) +int _ma_read_pack_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) { MARIA_BLOCK_INFO block_info; File file; @@ -466,15 +497,20 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, MARIA_SHARE *share=info->s; DBUG_ENTER("_ma_pack_rec_unpack"); - init_bit_buffer(&info->bit_buff, (uchar*) from,reclength); - + if (info->s->base.null_bytes) + { + memcpy(to, from, info->s->base.null_bytes); + to+= info->s->base.null_bytes; + from+= info->s->base.null_bytes; + reclength-= info->s->base.null_bytes; + } + init_bit_buffer(&info->bit_buff, (uchar*) from, reclength); for (current_field=share->rec, end=current_field+share->base.fields ; current_field < end ; current_field++,to=end_field) { end_field=to+current_field->length; - (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to, - (uchar*) end_field); + (*current_field->unpack)(current_field,&info->bit_buff, to, end_field); } if (! info->bit_buff.error && info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end) @@ -487,7 +523,7 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, /* Return function to unpack field */ static void (*get_unpack_function(MARIA_COLUMNDEF *rec)) -(MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, uchar *, uchar *) + (MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, byte *, byte *) { switch (rec->base_type) { case FIELD_SKIP_ZERO: @@ -541,8 +577,9 @@ static void (*get_unpack_function(MARIA_COLUMNDEF *rec)) /* The different functions to unpack a field */ -static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end) +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { if (get_bit(bit_buff)) bzero((char*) to,(uint) (end-to)); @@ -554,8 +591,8 @@ static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff } } -static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { if (get_bit(bit_buff)) bzero((char*) to,(uint) (end-to)); @@ -563,8 +600,8 @@ static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar * decode_bytes(rec,bit_buff,to,end); } -static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { if (get_bit(bit_buff)) bfill((byte*) to,(end-to),' '); @@ -572,8 +609,9 @@ static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, ucha decode_bytes(rec,bit_buff,to,end); } -static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end) +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if (get_bit(bit_buff)) @@ -596,8 +634,9 @@ static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit } } -static void uf_endspace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end) +static void uf_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if (get_bit(bit_buff)) @@ -615,8 +654,8 @@ static void uf_endspace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, decode_bytes(rec,bit_buff,to,end); } -static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if (get_bit(bit_buff)) @@ -634,8 +673,8 @@ static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uc } } -static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) @@ -648,8 +687,9 @@ static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *t bfill((byte*) end-spaces,spaces,' '); } -static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end) +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if (get_bit(bit_buff)) @@ -673,8 +713,9 @@ static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit } -static void uf_prespace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end) +static void uf_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if (get_bit(bit_buff)) @@ -693,8 +734,8 @@ static void uf_prespace_selected(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, } -static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if (get_bit(bit_buff)) @@ -712,8 +753,8 @@ static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uc } } -static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { uint spaces; if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) @@ -726,24 +767,24 @@ static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *t decode_bytes(rec,bit_buff,to+spaces,end); } -static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { end-=rec->space_length_bits; - decode_bytes(rec,bit_buff,(uchar*) to,end); + decode_bytes(rec,bit_buff, to, end); bzero((char*) end,rec->space_length_bits); } static void uf_constant(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff __attribute__((unused)), - uchar *to, - uchar *end) + byte *to, byte *end) { memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to)); } -static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, + byte *end) { reg1 uint field_length=(uint) (end-to); memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff, @@ -755,16 +796,16 @@ static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar * /*ARGSUSED*/ static void uf_zero(MARIA_COLUMNDEF *rec __attribute__((unused)), MARIA_BIT_BUFF *bit_buff __attribute__((unused)), - uchar *to, uchar *end) + byte *to, byte *end) { - bzero((char*) to,(uint) (end-to)); + bzero(to, (uint) (end-to)); } static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end) + byte *to, byte *end) { if (get_bit(bit_buff)) - bzero((byte*) to,(end-to)); + bzero(to, (uint) (end-to)); else { ulong length=get_bits(bit_buff,rec->space_length_bits); @@ -775,7 +816,8 @@ static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, bzero((byte*) to,(end-to)); return; } - decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length); + decode_bytes(rec,bit_buff,(byte*) bit_buff->blob_pos, + (byte*) bit_buff->blob_pos+length); _ma_store_blob_length((byte*) to,pack_length,length); memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, sizeof(char*)); @@ -785,21 +827,21 @@ static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end __attribute__((unused))) + byte *to, byte *end __attribute__((unused))) { if (get_bit(bit_buff)) to[0]= 0; /* Zero lengths */ else { ulong length=get_bits(bit_buff,rec->space_length_bits); - *to= (uchar) length; + *to= (char) length; decode_bytes(rec,bit_buff,to+1,to+1+length); } } static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, - uchar *to, uchar *end __attribute__((unused))) + byte *to, byte *end __attribute__((unused))) { if (get_bit(bit_buff)) to[0]=to[1]=0; /* Zero lengths */ @@ -815,8 +857,8 @@ static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, #if BITS_SAVED == 64 -static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,uchar *to, - uchar *end) +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { reg1 uint bits,low_byte; reg3 uint16 *pos; @@ -850,7 +892,7 @@ static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,uchar *to low_byte=decode_tree->table[low_byte]; if (low_byte & IS_CHAR) { - *to++ = (low_byte & 255); /* Found char in quick table */ + *to++ = (char) (low_byte & 255); /* Found char in quick table */ bits-= ((low_byte >> 8) & 31); /* Remove bits used */ } else @@ -870,7 +912,7 @@ static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,uchar *to decode_bytes_test_bit(7); bits-=8; } - *to++ = *pos; + *to++ = (char) *pos; } } while (to != end); @@ -880,8 +922,8 @@ static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,uchar *to #else -static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar *to, - uchar *end) +static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) { reg1 uint bits,low_byte; reg3 uint16 *pos; @@ -967,7 +1009,7 @@ static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar * decode_bytes_test_bit(7); bits-=8; } - *to++ = (uchar) *pos; + *to++ = (char) *pos; } } while (to != end); @@ -977,7 +1019,8 @@ static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, uchar * #endif /* BIT_SAVED == 64 */ -static uint decode_pos(MARIA_BIT_BUFF *bit_buff, MARIA_DECODE_TREE *decode_tree) +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree) { uint16 *pos=decode_tree->table; for (;;) @@ -991,8 +1034,9 @@ static uint decode_pos(MARIA_BIT_BUFF *bit_buff, MARIA_DECODE_TREE *decode_tree) } -int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, - register my_off_t filepos, +int _ma_read_rnd_pack_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, my_bool skip_deleted_blocks) { uint b_type; @@ -1039,9 +1083,9 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, MYF(MY_NABP))) goto err; } - info->packed_length=block_info.rec_len; - info->lastpos=filepos; - info->nextpos=block_info.filepos+block_info.rec_len; + info->packed_length= block_info.rec_len; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= block_info.filepos+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; DBUG_RETURN (_ma_pack_rec_unpack(info,buf,info->rec_buff, @@ -1053,10 +1097,10 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, byte *buf, /* Read and process header from a huff-record-file */ -uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, File file, - my_off_t filepos) +uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, + File file, my_off_t filepos) { - uchar *header=info->header; + uchar *header= info->header; uint head_length,ref_length; LINT_INIT(ref_length); @@ -1078,8 +1122,9 @@ uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, File file, { head_length+= read_pack_length((uint) maria->s->pack.version, header + head_length, &info->blob_len); - if (!(_ma_alloc_rec_buff(maria,info->rec_len + info->blob_len, - &maria->rec_buff))) + if (_ma_alloc_buffer(&maria->rec_buff, &maria->rec_buff_size, + info->rec_len + info->blob_len + + maria->s->base.extra_rec_buff_size)) return BLOCK_FATAL_ERROR; /* not enough memory */ maria->bit_buff.blob_pos=(uchar*) maria->rec_buff+info->rec_len; maria->bit_buff.blob_end= maria->bit_buff.blob_pos+info->blob_len; @@ -1098,7 +1143,8 @@ uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BLOCK_INFO *info, File file, /* rutines for bit buffer */ /* Note buffer must be 6 byte bigger than longest row */ -static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer, uint length) +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer, + uint length) { bit_buff->pos=buffer; bit_buff->end=buffer+length; @@ -1174,8 +1220,10 @@ static uint max_bit(register uint value) #ifdef HAVE_MMAP -static int _ma_read_mempack_record(MARIA_HA *info,my_off_t filepos,byte *buf); -static int _ma_read_rnd_mempack_record(MARIA_HA*, byte *,my_off_t, my_bool); +static int _ma_read_mempack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos); +static int _ma_read_rnd_mempack_record(MARIA_HA*, byte *, MARIA_RECORD_POS, + my_bool); my_bool _ma_memmap_file(MARIA_HA *info) { @@ -1195,7 +1243,7 @@ my_bool _ma_memmap_file(MARIA_HA *info) } info->opt_flag|= MEMMAP_USED; info->read_record= share->read_record= _ma_read_mempack_record; - share->read_rnd= _ma_read_rnd_mempack_record; + share->scan= _ma_read_rnd_mempack_record; DBUG_RETURN(1); } @@ -1207,7 +1255,8 @@ void _ma_unmap_file(MARIA_HA *info) } -static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria, + MARIA_BLOCK_INFO *info, uchar *header) { header+= read_pack_length((uint) maria->s->pack.version, header, @@ -1217,8 +1266,8 @@ static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, header+= read_pack_length((uint) maria->s->pack.version, header, &info->blob_len); /* _ma_alloc_rec_buff sets my_errno on error */ - if (!(_ma_alloc_rec_buff(maria, info->blob_len, - &maria->rec_buff))) + if (_ma_alloc_buffer(&maria->rec_buff, &maria->rec_buff_size, + info->blob_len + maria->s->base.extra_rec_buff_size)) return 0; /* not enough memory */ maria->bit_buff.blob_pos=(uchar*) maria->rec_buff; maria->bit_buff.blob_end= (uchar*) maria->rec_buff + info->blob_len; @@ -1227,7 +1276,8 @@ static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,MARIA_BLOCK_INFO *info, } -static int _ma_read_mempack_record(MARIA_HA *info, my_off_t filepos, byte *buf) +static int _ma_read_mempack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos) { MARIA_BLOCK_INFO block_info; MARIA_SHARE *share=info->s; @@ -1246,8 +1296,9 @@ static int _ma_read_mempack_record(MARIA_HA *info, my_off_t filepos, byte *buf) /*ARGSUSED*/ -static int _ma_read_rnd_mempack_record(MARIA_HA *info, byte *buf, - register my_off_t filepos, +static int _ma_read_rnd_mempack_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, my_bool skip_deleted_blocks __attribute__((unused))) { @@ -1274,8 +1325,8 @@ static int _ma_read_rnd_mempack_record(MARIA_HA *info, byte *buf, } #endif info->packed_length=block_info.rec_len; - info->lastpos=filepos; - info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= filepos+(uint) (pos-start)+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; DBUG_RETURN (_ma_pack_rec_unpack(info,buf,pos, block_info.rec_len)); diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index 054b8e16468..e7ca329c3e2 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -20,22 +20,20 @@ /* Fetch a key-page in memory */ -uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page, int level, - uchar *buff, int return_buffer) +byte *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, + byte *buff, int return_buffer) { - uchar *tmp; + byte *tmp; uint page_size; DBUG_ENTER("_ma_fetch_keypage"); DBUG_PRINT("enter",("page: %ld",page)); - tmp=(uchar*) key_cache_read(info->s->key_cache, - info->s->kfile, page, level, (byte*) buff, - (uint) keyinfo->block_length, - (uint) keyinfo->block_length, - return_buffer); + tmp= key_cache_read(info->s->key_cache, info->s->kfile, page, level, buff, + info->s->block_size, info->s->block_size, + return_buffer); if (tmp == info->buff) - info->buff_used=1; + info->keybuff_used=1; else if (!tmp) { DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno)); @@ -53,8 +51,8 @@ uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, DBUG_DUMP("page", (char*) tmp, keyinfo->block_length); info->last_keypage = HA_OFFSET_ERROR; maria_print_error(info->s, HA_ERR_CRASHED); - my_errno = HA_ERR_CRASHED; - tmp = 0; + my_errno= HA_ERR_CRASHED; + tmp= 0; } DBUG_RETURN(tmp); } /* _ma_fetch_keypage */ @@ -63,7 +61,7 @@ uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Write a key-page on disk */ int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - my_off_t page, int level, uchar *buff) + my_off_t page, int level, byte *buff) { reg3 uint length; DBUG_ENTER("_ma_write_keypage"); @@ -112,8 +110,8 @@ int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, DBUG_ENTER("_ma_dispose"); DBUG_PRINT("enter",("pos: %ld", (long) pos)); - old_link= info->s->state.key_del[keyinfo->block_size_index]; - info->s->state.key_del[keyinfo->block_size_index]= pos; + old_link= info->s->state.key_del; + info->s->state.key_del= pos; mi_sizestore(buff,old_link); info->s->state.changed|= STATE_NOT_SORTED_PAGES; DBUG_RETURN(key_cache_write(info->s->key_cache, @@ -129,11 +127,10 @@ int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) { my_off_t pos; - char buff[8]; + byte buff[8]; DBUG_ENTER("_ma_new"); - if ((pos= info->s->state.key_del[keyinfo->block_size_index]) == - HA_OFFSET_ERROR) + if ((pos= info->s->state.key_del) == HA_OFFSET_ERROR) { if (info->state->key_file_length >= info->s->base.max_key_file_length - keyinfo->block_length) @@ -153,7 +150,7 @@ my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) (uint) keyinfo->block_length,0)) pos= HA_OFFSET_ERROR; else - info->s->state.key_del[keyinfo->block_size_index]= mi_sizekorr(buff); + info->s->state.key_del= mi_sizekorr(buff); } info->s->state.changed|= STATE_NOT_SORTED_PAGES; DBUG_PRINT("exit",("Pos: %ld",(long) pos)); diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c index 0f6883f4c9d..243e8e1b9c3 100644 --- a/storage/maria/ma_range.c +++ b/storage/maria/ma_range.c @@ -24,10 +24,10 @@ static ha_rows _ma_record_pos(MARIA_HA *info,const byte *key,uint key_len, enum ha_rkey_function search_flag); -static double _ma_search_pos(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key, - uint key_len,uint nextflag,my_off_t pos); -static uint _ma_keynr(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *page, - uchar *keypos,uint *ret_max_key); +static double _ma_search_pos(MARIA_HA *info,MARIA_KEYDEF *keyinfo, byte *key, + uint key_len,uint nextflag, my_off_t pos); +static uint _ma_keynr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *keypos, uint *ret_max_key); /* @@ -68,12 +68,12 @@ ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key, #ifdef HAVE_RTREE_KEYS case HA_KEY_ALG_RTREE: { - uchar * key_buff; + byte *key_buff; uint start_key_len; key_buff= info->lastkey+info->s->base.max_key_length; start_key_len= _ma_pack_key(info,inx, key_buff, - (uchar*) min_key->key, min_key->length, + min_key->key, min_key->length, (HA_KEYSEG**) 0); res= maria_rtree_estimate(info, inx, key_buff, start_key_len, maria_read_vec[min_key->flag]); @@ -113,24 +113,23 @@ static ha_rows _ma_record_pos(MARIA_HA *info, const byte *key, uint key_len, { uint inx=(uint) info->lastinx, nextflag; MARIA_KEYDEF *keyinfo=info->s->keyinfo+inx; - uchar *key_buff; + byte *key_buff; double pos; - DBUG_ENTER("_ma_record_pos"); DBUG_PRINT("enter",("search_flag: %d",search_flag)); if (key_len == 0) - key_len=USE_WHOLE_KEY; + key_len= USE_WHOLE_KEY; key_buff=info->lastkey+info->s->base.max_key_length; - key_len= _ma_pack_key(info,inx,key_buff,(uchar*) key,key_len, + key_len= _ma_pack_key(info, inx, key_buff, key, key_len, (HA_KEYSEG**) 0); - DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg, - (uchar*) key_buff,key_len);); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg, + key_buff, key_len);); nextflag=maria_read_vec[search_flag]; if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) key_len=USE_WHOLE_KEY; - pos= _ma_search_pos(info,keyinfo,key_buff,key_len, + pos= _ma_search_pos(info,keyinfo, key_buff, key_len, nextflag | SEARCH_SAVE_BUFF, info->s->state.key_root[inx]); if (pos >= 0.0) @@ -147,13 +146,13 @@ static ha_rows _ma_record_pos(MARIA_HA *info, const byte *key, uint key_len, static double _ma_search_pos(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uchar *key, uint key_len, uint nextflag, + byte *key, uint key_len, uint nextflag, register my_off_t pos) { int flag; uint nod_flag,keynr,max_keynr; my_bool after_key; - uchar *keypos,*buff; + byte *keypos, *buff; double offset; DBUG_ENTER("_ma_search_pos"); @@ -162,7 +161,7 @@ static double _ma_search_pos(register MARIA_HA *info, if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1))) goto err; - flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, + flag=(*keyinfo->bin_search)(info, keyinfo, buff, key, key_len, nextflag, &keypos,info->lastkey, &after_key); nod_flag=_ma_test_if_nod(buff); keynr= _ma_keynr(info,keyinfo,buff,keypos,&max_keynr); @@ -213,11 +212,11 @@ err: /* Get keynummer of current key and max number of keys in nod */ -static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, - uchar *keypos, uint *ret_max_key) +static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *page, byte *keypos, uint *ret_max_key) { uint nod_flag,keynr,max_key; - uchar t_buff[HA_MAX_KEY_BUFF],*end; + byte t_buff[HA_MAX_KEY_BUFF],*end; end= page+maria_getint(page); nod_flag=_ma_test_if_nod(page); diff --git a/storage/maria/ma_rfirst.c b/storage/maria/ma_rfirst.c index 503e8989936..6fa8af75c40 100644 --- a/storage/maria/ma_rfirst.c +++ b/storage/maria/ma_rfirst.c @@ -21,7 +21,7 @@ int maria_rfirst(MARIA_HA *info, byte *buf, int inx) { DBUG_ENTER("maria_rfirst"); - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; info->update|= HA_STATE_PREV_FOUND; DBUG_RETURN(maria_rnext(info,buf,inx)); } /* maria_rfirst */ diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c index 2cb54a73b15..bd92ebd4e4c 100644 --- a/storage/maria/ma_rkey.c +++ b/storage/maria/ma_rkey.c @@ -22,16 +22,16 @@ /* Read a record using key */ /* Ordinary search_flag is 0 ; Give error if no record with key */ -int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len, - enum ha_rkey_function search_flag) +int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, + uint key_len, enum ha_rkey_function search_flag) { - uchar *key_buff; + byte *key_buff; MARIA_SHARE *share=info->s; MARIA_KEYDEF *keyinfo; HA_KEYSEG *last_used_keyseg; uint pack_key_length, use_key_length, nextflag; DBUG_ENTER("maria_rkey"); - DBUG_PRINT("enter", ("base: %lx buf: %lx inx: %d search_flag: %d", + DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d", (long) info, (long) buf, inx, search_flag)); if ((inx = _ma_check_index(info,inx)) < 0) @@ -47,7 +47,7 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len /* key is already packed!; This happens when we are using a MERGE TABLE */ - key_buff=info->lastkey+info->s->base.max_key_length; + key_buff= info->lastkey+info->s->base.max_key_length; pack_key_length= key_len; bmove(key_buff,key,key_len); last_used_keyseg= 0; @@ -58,7 +58,7 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len key_len=USE_WHOLE_KEY; /* Save the packed key for later use in the second buffer of lastkey. */ key_buff=info->lastkey+info->s->base.max_key_length; - pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, (uchar*) key, + pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, key, key_len, &last_used_keyseg); /* Save packed_key_length for use by the MERGE engine. */ info->pack_key_length= pack_key_length; @@ -82,15 +82,17 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len if (maria_rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0) { maria_print_error(info->s, HA_ERR_CRASHED); - my_errno=HA_ERR_CRASHED; - goto err; + my_errno= HA_ERR_CRASHED; + info->cur_row.lastpos= HA_OFFSET_ERROR; } break; #endif case HA_KEY_ALG_BTREE: default: if (!_ma_search(info, keyinfo, key_buff, use_key_length, - maria_read_vec[search_flag], info->s->state.key_root[inx])) + maria_read_vec[search_flag], + info->s->state.key_root[inx]) && + share->concurrent_insert) { /* If we are searching for an exact key (including the data pointer) @@ -98,50 +100,65 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len then the result is "key not found". */ if ((search_flag == HA_READ_KEY_EXACT) && - (info->lastpos >= info->state->data_file_length)) + (info->cur_row.lastpos >= info->state->data_file_length)) { my_errno= HA_ERR_KEY_NOT_FOUND; - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; } - else while (info->lastpos >= info->state->data_file_length) + else { - /* - Skip rows that are inserted by other threads since we got a lock - Note that this can only happen if we are not searching after an - exact key, because the keys are sorted according to position - */ + while (info->cur_row.lastpos >= info->state->data_file_length) + { + /* + Skip rows that are inserted by other threads since we got a lock + Note that this can only happen if we are not searching after an + exact key, because the keys are sorted according to position + */ - if (_ma_search_next(info, keyinfo, info->lastkey, - info->lastkey_length, - maria_readnext_vec[search_flag], - info->s->state.key_root[inx])) - break; + if (_ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, + maria_readnext_vec[search_flag], + info->s->state.key_root[inx])) + { + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + } } } } if (share->concurrent_insert) rw_unlock(&share->key_root_lock[inx]); + if (info->cur_row.lastpos == HA_OFFSET_ERROR) + { + fast_ma_writeinfo(info); + goto err; + } + /* Calculate length of the found key; Used by maria_rnext_same */ - if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg && - info->lastpos != HA_OFFSET_ERROR) + if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg) info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey, last_used_keyseg); else info->last_rkey_length= pack_key_length; + /* Check if we don't want to have record back, only error message */ if (!buf) - DBUG_RETURN(info->lastpos == HA_OFFSET_ERROR ? my_errno : 0); - - if (!(*info->read_record)(info,info->lastpos,buf)) + { + fast_ma_writeinfo(info); + DBUG_RETURN(0); + } + if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) { info->update|= HA_STATE_AKTIV; /* Record is read */ DBUG_RETURN(0); } - info->lastpos = HA_OFFSET_ERROR; /* Didn't find key */ + info->cur_row.lastpos= HA_OFFSET_ERROR; /* Didn't find row */ +err: /* Store last used key as a base for read next */ memcpy(info->lastkey,key_buff,pack_key_length); info->last_rkey_length= pack_key_length; @@ -150,6 +167,5 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, uint key_len if (search_flag == HA_READ_AFTER_KEY) info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */ -err: DBUG_RETURN(my_errno); } /* _ma_rkey */ diff --git a/storage/maria/ma_rlast.c b/storage/maria/ma_rlast.c index 8ce26afa78d..504cc89aed3 100644 --- a/storage/maria/ma_rlast.c +++ b/storage/maria/ma_rlast.c @@ -21,7 +21,7 @@ int maria_rlast(MARIA_HA *info, byte *buf, int inx) { DBUG_ENTER("maria_rlast"); - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; info->update|= HA_STATE_NEXT_FOUND; DBUG_RETURN(maria_rprev(info,buf,inx)); } /* maria_rlast */ diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c index 8f342c6a8d2..c7feded933e 100644 --- a/storage/maria/ma_rnext.c +++ b/storage/maria/ma_rnext.c @@ -34,7 +34,8 @@ int maria_rnext(MARIA_HA *info, byte *buf, int inx) if ((inx = _ma_check_index(info,inx)) < 0) DBUG_RETURN(my_errno); flag=SEARCH_BIGGER; /* Read next */ - if (info->lastpos == HA_OFFSET_ERROR && info->update & HA_STATE_PREV_FOUND) + if (info->cur_row.lastpos == HA_OFFSET_ERROR && + info->update & HA_STATE_PREV_FOUND) flag=0; /* Read first */ if (fast_ma_readinfo(info)) @@ -86,7 +87,7 @@ int maria_rnext(MARIA_HA *info, byte *buf, int inx) { if (!error) { - while (info->lastpos >= info->state->data_file_length) + while (info->cur_row.lastpos >= info->state->data_file_length) { /* Skip rows inserted by other threads since we got a lock */ if ((error= _ma_search_next(info,info->s->keyinfo+inx, @@ -110,9 +111,9 @@ int maria_rnext(MARIA_HA *info, byte *buf, int inx) } else if (!buf) { - DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0); + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); } - else if (!(*info->read_record)(info,info->lastpos,buf)) + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) { info->update|= HA_STATE_AKTIV; /* Record is read */ DBUG_RETURN(0); diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c index b53639073e3..a5ce0cfe15c 100644 --- a/storage/maria/ma_rnext_same.c +++ b/storage/maria/ma_rnext_same.c @@ -17,13 +17,14 @@ #include "maria_def.h" #include "ma_rt_index.h" - /* - Read next row with the same key as previous read, but abort if - the key changes. - One may have done a write, update or delete of the previous row. - NOTE! Even if one changes the previous row, the next read is done - based on the position of the last used key! - */ +/* + Read next row with the same key as previous read, but abort if + the key changes. + One may have done a write, update or delete of the previous row. + + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! +*/ int maria_rnext_same(MARIA_HA *info, byte *buf) { @@ -32,9 +33,10 @@ int maria_rnext_same(MARIA_HA *info, byte *buf) MARIA_KEYDEF *keyinfo; DBUG_ENTER("maria_rnext_same"); - if ((int) (inx=info->lastinx) < 0 || info->lastpos == HA_OFFSET_ERROR) + if ((int) (inx= info->lastinx) < 0 || + info->cur_row.lastpos == HA_OFFSET_ERROR) DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); - keyinfo=info->s->keyinfo+inx; + keyinfo= info->s->keyinfo+inx; if (fast_ma_readinfo(info)) DBUG_RETURN(my_errno); @@ -50,7 +52,7 @@ int maria_rnext_same(MARIA_HA *info, byte *buf) { error=1; my_errno=HA_ERR_END_OF_FILE; - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; break; } break; @@ -68,16 +70,17 @@ int maria_rnext_same(MARIA_HA *info, byte *buf) info->lastkey_length,SEARCH_BIGGER, info->s->state.key_root[inx]))) break; - if (ha_key_cmp(keyinfo->seg, info->lastkey, info->lastkey2, + if (ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, + (uchar*) info->lastkey2, info->last_rkey_length, SEARCH_FIND, not_used)) { error=1; my_errno=HA_ERR_END_OF_FILE; - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; break; } /* Skip rows that are inserted by other threads since we got a lock */ - if (info->lastpos < info->state->data_file_length) + if (info->cur_row.lastpos < info->state->data_file_length) break; } } @@ -94,9 +97,9 @@ int maria_rnext_same(MARIA_HA *info, byte *buf) } else if (!buf) { - DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0); + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); } - else if (!(*info->read_record)(info,info->lastpos,buf)) + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) { info->update|= HA_STATE_AKTIV; /* Record is read */ DBUG_RETURN(0); diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c index 8dd4498cf8b..ea562359ded 100644 --- a/storage/maria/ma_rprev.c +++ b/storage/maria/ma_rprev.c @@ -33,7 +33,8 @@ int maria_rprev(MARIA_HA *info, byte *buf, int inx) if ((inx = _ma_check_index(info,inx)) < 0) DBUG_RETURN(my_errno); flag=SEARCH_SMALLER; /* Read previous */ - if (info->lastpos == HA_OFFSET_ERROR && info->update & HA_STATE_NEXT_FOUND) + if (info->cur_row.lastpos == HA_OFFSET_ERROR && + info->update & HA_STATE_NEXT_FOUND) flag=0; /* Read last */ if (fast_ma_readinfo(info)) @@ -56,7 +57,7 @@ int maria_rprev(MARIA_HA *info, byte *buf, int inx) { if (!error) { - while (info->lastpos >= info->state->data_file_length) + while (info->cur_row.lastpos >= info->state->data_file_length) { /* Skip rows that are inserted by other threads since we got a lock */ if ((error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, @@ -77,9 +78,9 @@ int maria_rprev(MARIA_HA *info, byte *buf, int inx) } else if (!buf) { - DBUG_RETURN(info->lastpos==HA_OFFSET_ERROR ? my_errno : 0); + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); } - else if (!(*info->read_record)(info,info->lastpos,buf)) + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) { info->update|= HA_STATE_AKTIV; /* Record is read */ DBUG_RETURN(0); diff --git a/storage/maria/ma_rrnd.c b/storage/maria/ma_rrnd.c index 2f01c0e92c5..33940d5f23f 100644 --- a/storage/maria/ma_rrnd.c +++ b/storage/maria/ma_rrnd.c @@ -21,40 +21,34 @@ #include "maria_def.h" /* - Read a row based on position. - If filepos= HA_OFFSET_ERROR then read next row - Return values - Returns one of following values: - 0 = Ok. - HA_ERR_RECORD_DELETED = Record is deleted. - HA_ERR_END_OF_FILE = EOF. + Read a row based on position. + + RETURN + 0 Ok. + HA_ERR_RECORD_DELETED Record is deleted. + HA_ERR_END_OF_FILE EOF. */ -int maria_rrnd(MARIA_HA *info, byte *buf, register my_off_t filepos) +int maria_rrnd(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) { - my_bool skip_deleted_blocks; DBUG_ENTER("maria_rrnd"); - skip_deleted_blocks=0; - + DBUG_ASSERT(filepos != HA_OFFSET_ERROR); +#ifdef NOT_USED if (filepos == HA_OFFSET_ERROR) { skip_deleted_blocks=1; - if (info->lastpos == HA_OFFSET_ERROR) /* First read ? */ - filepos= info->s->pack.header_length; /* Read first record */ + if (info->cur_row.lastpos == HA_OFFSET_ERROR) /* First read ? */ + filepos= info->s->pack.header_length; /* Read first record */ else - filepos= info->nextpos; + filepos= info->cur_row.nextpos; } +#endif - if (info->once_flags & RRND_PRESERVE_LASTINX) - info->once_flags&= ~RRND_PRESERVE_LASTINX; - else - info->lastinx= -1; /* Can't forward or backward */ /* Init all but update-flag */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) DBUG_RETURN(my_errno); - DBUG_RETURN ((*info->s->read_rnd)(info,buf,filepos,skip_deleted_blocks)); + DBUG_RETURN((*info->s->read_record)(info, buf, filepos)); } diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c index 913ae3b4370..7556c1e7332 100644 --- a/storage/maria/ma_rsame.c +++ b/storage/maria/ma_rsame.c @@ -16,14 +16,17 @@ #include "maria_def.h" - /* - ** Find current row with read on position or read on key - ** If inx >= 0 find record using key - ** Return values: - ** 0 = Ok. - ** HA_ERR_KEY_NOT_FOUND = Row is deleted - ** HA_ERR_END_OF_FILE = End of file - */ +/* + Find current row with read on position or read on key + + NOTES + If inx >= 0 find record using key + + RETURN + 0 Ok + HA_ERR_KEY_NOT_FOUND Row is deleted + HA_ERR_END_OF_FILE End of file +*/ int maria_rsame(MARIA_HA *info, byte *record, int inx) @@ -34,7 +37,8 @@ int maria_rsame(MARIA_HA *info, byte *record, int inx) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } - if (info->lastpos == HA_OFFSET_ERROR || info->update & HA_STATE_DELETED) + if (info->cur_row.lastpos == HA_OFFSET_ERROR || + info->update & HA_STATE_DELETED) { DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No current record */ } @@ -48,7 +52,7 @@ int maria_rsame(MARIA_HA *info, byte *record, int inx) { info->lastinx=inx; info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, - info->lastpos); + info->cur_row.lastpos); if (info->s->concurrent_insert) rw_rdlock(&info->s->key_root_lock[inx]); VOID(_ma_search(info,info->s->keyinfo+inx,info->lastkey, USE_WHOLE_KEY, @@ -58,7 +62,7 @@ int maria_rsame(MARIA_HA *info, byte *record, int inx) rw_unlock(&info->s->key_root_lock[inx]); } - if (!(*info->read_record)(info,info->lastpos,record)) + if (!(*info->read_record)(info, record, info->cur_row.lastpos)) DBUG_RETURN(0); if (my_errno == HA_ERR_RECORD_DELETED) my_errno=HA_ERR_KEY_NOT_FOUND; diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c index 09861c03c32..859f0da9b51 100644 --- a/storage/maria/ma_rsamepos.c +++ b/storage/maria/ma_rsamepos.c @@ -28,29 +28,31 @@ ** HA_ERR_END_OF_FILE = End of file */ -int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, my_off_t filepos) +int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, + MARIA_RECORD_POS filepos) { DBUG_ENTER("maria_rsame_with_pos"); DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos)); - if (inx < -1 || (inx >= 0 && !maria_is_key_active(info->s->state.key_map, inx))) + if (inx < -1 || + (inx >= 0 && !maria_is_key_active(info->s->state.key_map, inx))) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - if ((*info->s->read_rnd)(info,record,filepos,0)) + if ((*info->s->read_record)(info, record, filepos)) { if (my_errno == HA_ERR_RECORD_DELETED) my_errno=HA_ERR_KEY_NOT_FOUND; DBUG_RETURN(my_errno); } - info->lastpos=filepos; - info->lastinx=inx; + info->cur_row.lastpos= filepos; + info->lastinx= inx; if (inx >= 0) { info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, - info->lastpos); + info->cur_row.lastpos); info->update|=HA_STATE_KEY_CHANGED; /* Don't use indexposition */ } DBUG_RETURN(0); diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c index 83ced5b8167..8e8ec6c991b 100644 --- a/storage/maria/ma_rt_index.c +++ b/storage/maria/ma_rt_index.c @@ -53,18 +53,17 @@ typedef struct st_page_list 1 Not found */ -static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint search_flag, - uint nod_cmp_flag, my_off_t page, int level) +static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uint search_flag, + uint nod_cmp_flag, my_off_t page, int level) { - uchar *k; - uchar *last; uint nod_flag; int res; - uchar *page_buf; + byte *page_buf, *k, *last; int k_len; uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level; - if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; @@ -77,24 +76,27 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint sear if(info->maria_rtree_recursion_depth >= level) { - k = page_buf + *saved_key; + k= page_buf + *saved_key; } else { k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); } - last = rt_PAGE_END(page_buf); + last= rt_PAGE_END(page_buf); for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) { if (nod_flag) { /* this is an internal node in the tree */ - if (!(res = maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, k, - info->last_rkey_length, nod_cmp_flag))) + if (!(res = maria_rtree_key_cmp(keyinfo->seg, + info->first_mbr_key, k, + info->last_rkey_length, nod_cmp_flag))) { - switch ((res = maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, - _ma_kpos(nod_flag, k), level + 1))) + switch ((res = maria_rtree_find_req(info, keyinfo, search_flag, + nod_cmp_flag, + _ma_kpos(nod_flag, k), + level + 1))) { case 0: /* found - exit from recursion */ *saved_key = k - page_buf; @@ -111,11 +113,11 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint sear else { /* this is a leaf */ - if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, k, - info->last_rkey_length, search_flag)) + if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, + k, info->last_rkey_length, search_flag)) { - uchar *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); - info->lastpos = _ma_dpos(info, 0, after_key); + byte *after_key = (byte*) rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); info->lastkey_length = k_len + info->s->base.rec_reflength; memcpy(info->lastkey, k, info->lastkey_length); info->maria_rtree_recursion_depth = level; @@ -126,11 +128,11 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint sear info->int_keypos = info->buff; info->int_maxpos = info->buff + (last - after_key); memcpy(info->buff, after_key, last - after_key); - info->buff_used = 0; + info->keybuff_used = 0; } else { - info->buff_used = 1; + info->keybuff_used = 1; } res = 0; @@ -138,7 +140,7 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint sear } } } - info->lastpos = HA_OFFSET_ERROR; + info->cur_row.lastpos = HA_OFFSET_ERROR; my_errno = HA_ERR_KEY_NOT_FOUND; res = 1; @@ -148,7 +150,7 @@ ok: err1: my_afree((byte*)page_buf); - info->lastpos = HA_OFFSET_ERROR; + info->cur_row.lastpos = HA_OFFSET_ERROR; return -1; } @@ -170,8 +172,8 @@ err1: 1 Not found */ -int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key, uint key_length, - uint search_flag) +int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint search_flag) { my_off_t root; uint nod_cmp_flag; @@ -191,11 +193,12 @@ int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key, uint key_leng info->last_rkey_length = key_length; info->maria_rtree_recursion_depth = -1; - info->buff_used = 1; + info->keybuff_used = 1; - nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? - MBR_WITHIN : MBR_INTERSECT); - return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); + nod_cmp_flag= ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, + 0); } @@ -221,27 +224,29 @@ int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag) MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; if (info->update & HA_STATE_DELETED) - return maria_rtree_find_first(info, keynr, info->lastkey, info->lastkey_length, - search_flag); + return maria_rtree_find_first(info, keynr, info->lastkey, + info->lastkey_length, + search_flag); - if (!info->buff_used) + if (!info->keybuff_used) { - uchar *key= info->int_keypos; + byte *key= info->int_keypos; while (key < info->int_maxpos) { - if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, key, - info->last_rkey_length, search_flag)) + if (!maria_rtree_key_cmp(keyinfo->seg, + info->first_mbr_key, key, + info->last_rkey_length, search_flag)) { - uchar *after_key = key + keyinfo->keylength; + byte *after_key= key + keyinfo->keylength; - info->lastpos= _ma_dpos(info, 0, after_key); + info->cur_row.lastpos= _ma_dpos(info, 0, after_key); memcpy(info->lastkey, key, info->lastkey_length); if (after_key < info->int_maxpos) info->int_keypos= after_key; else - info->buff_used= 1; + info->keybuff_used= 1; return 0; } key+= keyinfo->keylength; @@ -274,15 +279,12 @@ int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag) static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_length, my_off_t page, int level) { - uchar *k; - uchar *last; - uint nod_flag; + byte *page_buf, *last, *k; + uint nod_flag, k_len; int res; - uchar *page_buf; - uint k_len; - uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level; + uint *saved_key= (uint*) (info->maria_rtree_recursion_state) + level; - if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length))) return -1; if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) goto err1; @@ -312,7 +314,7 @@ static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_l { /* this is an internal node in the tree */ switch ((res = maria_rtree_get_req(info, keyinfo, key_length, - _ma_kpos(nod_flag, k), level + 1))) + _ma_kpos(nod_flag, k), level + 1))) { case 0: /* found - exit from recursion */ *saved_key = k - page_buf; @@ -328,8 +330,8 @@ static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_l else { /* this is a leaf */ - uchar *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); - info->lastpos = _ma_dpos(info, 0, after_key); + byte *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); info->lastkey_length = k_len + info->s->base.rec_reflength; memcpy(info->lastkey, k, info->lastkey_length); @@ -338,21 +340,21 @@ static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_l if (after_key < last) { - info->int_keypos = (uchar*)saved_key; + info->int_keypos = (byte*) saved_key; memcpy(info->buff, page_buf, keyinfo->block_length); info->int_maxpos = rt_PAGE_END(info->buff); - info->buff_used = 0; + info->keybuff_used = 0; } else { - info->buff_used = 1; + info->keybuff_used = 1; } res = 0; goto ok; } } - info->lastpos = HA_OFFSET_ERROR; + info->cur_row.lastpos = HA_OFFSET_ERROR; my_errno = HA_ERR_KEY_NOT_FOUND; res = 1; @@ -362,7 +364,7 @@ ok: err1: my_afree((byte*)page_buf); - info->lastpos = HA_OFFSET_ERROR; + info->cur_row.lastpos = HA_OFFSET_ERROR; return -1; } @@ -388,7 +390,7 @@ int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length) } info->maria_rtree_recursion_depth = -1; - info->buff_used = 1; + info->keybuff_used = 1; return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); } @@ -408,23 +410,23 @@ int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length) my_off_t root; MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; - if (!info->buff_used) + if (!info->keybuff_used) { uint k_len = keyinfo->keylength - info->s->base.rec_reflength; /* rt_PAGE_NEXT_KEY(info->int_keypos) */ - uchar *key = info->buff + *(int*)info->int_keypos + k_len + + byte *key = info->buff + *(int*)info->int_keypos + k_len + info->s->base.rec_reflength; /* rt_PAGE_NEXT_KEY(key) */ - uchar *after_key = key + k_len + info->s->base.rec_reflength; + byte *after_key = key + k_len + info->s->base.rec_reflength; - info->lastpos = _ma_dpos(info, 0, after_key); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); info->lastkey_length = k_len + info->s->base.rec_reflength; memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength); *(int*)info->int_keypos = key - info->buff; if (after_key >= info->int_maxpos) { - info->buff_used = 1; + info->keybuff_used = 1; } return 0; @@ -447,8 +449,10 @@ int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length) */ #ifdef PICK_BY_PERIMETER -static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, uchar *page_buf, uint nod_flag) +static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, + uint key_length, byte *page_buf, + uint nod_flag) { double increase; double best_incr = DBL_MAX; @@ -480,16 +484,18 @@ static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar #endif /*PICK_BY_PERIMETER*/ #ifdef PICK_BY_AREA -static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, uchar *page_buf, uint nod_flag) +static byte *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, byte *page_buf, + uint nod_flag) { double increase; double best_incr = DBL_MAX; double area; double best_area; - uchar *best_key; - uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); - uchar *last = rt_PAGE_END(page_buf); + byte *best_key; + byte *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + byte *last = rt_PAGE_END(page_buf); LINT_INIT(best_area); LINT_INIT(best_key); @@ -498,7 +504,7 @@ static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar { /* The following is safe as -1.0 is an exact number */ if ((increase = maria_rtree_area_increase(keyinfo->seg, k, key, key_length, - &area)) == -1.0) + &area)) == -1.0) return NULL; /* The following should be safe, even if we compare doubles */ if (increase < best_incr) @@ -532,16 +538,17 @@ static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar 1 Child was split */ -static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, my_off_t page, my_off_t *new_page, - int ins_level, int level) +static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, my_off_t page, + my_off_t *new_page, + int ins_level, int level) { - uchar *k; uint nod_flag; - uchar *page_buf; int res; + byte *page_buf, *k; - if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length + + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; @@ -555,10 +562,11 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar * (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */ { if ((k = maria_rtree_pick_key(info, keyinfo, key, key_length, page_buf, - nod_flag)) == NULL) + nod_flag)) == NULL) goto err1; switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, - _ma_kpos(nod_flag, k), new_page, ins_level, level + 1))) + _ma_kpos(nod_flag, k), new_page, + ins_level, level + 1))) { case 0: /* child was not split */ { @@ -569,14 +577,15 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar * } case 1: /* child was split */ { - uchar *new_key = page_buf + keyinfo->block_length + nod_flag; + byte *new_key = page_buf + keyinfo->block_length + nod_flag; /* set proper MBR for key */ if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, - _ma_kpos(nod_flag, k))) + _ma_kpos(nod_flag, k))) goto err1; /* add new key for new page */ _ma_kpointer(info, new_key - nod_flag, *new_page); - if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, *new_page)) + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + *new_page)) goto err1; res = maria_rtree_add_key(info, keyinfo, new_key, key_length, page_buf, new_page); @@ -593,18 +602,18 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar * } else { - res = maria_rtree_add_key(info, keyinfo, key, key_length, page_buf, new_page); + res = maria_rtree_add_key(info, keyinfo, key, key_length, page_buf, + new_page); if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) goto err1; - goto ok; } ok: - my_afree((byte*)page_buf); + my_afree(page_buf); return res; err1: - my_afree((byte*)page_buf); + my_afree(page_buf); return -1; } @@ -618,8 +627,8 @@ err1: 1 Root was split */ -static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key, - uint key_length, int ins_level) +static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, byte *key, + uint key_length, int ins_level) { my_off_t old_root; MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; @@ -632,7 +641,7 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key, if ((old_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR) return -1; - info->buff_used = 1; + info->keybuff_used = 1; maria_putint(info->buff, 2, 0); res = maria_rtree_add_key(info, keyinfo, key, key_length, info->buff, NULL); if (_ma_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff)) @@ -650,13 +659,12 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key, } case 1: /* root was split, grow a new root */ { - uchar *new_root_buf; + byte *new_root_buf, *new_key; my_off_t new_root; - uchar *new_key; uint nod_flag = info->s->base.key_reflength; - if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - HA_MAX_KEY_BUFF))) + if (!(new_root_buf= (byte*) my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; @@ -670,15 +678,19 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key, new_key = new_root_buf + keyinfo->block_length + nod_flag; _ma_kpointer(info, new_key - nod_flag, old_root); - if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, old_root)) + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + old_root)) goto err1; - if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, NULL) + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, + NULL) == -1) goto err1; _ma_kpointer(info, new_key - nod_flag, new_page); - if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, new_page)) + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + new_page)) goto err1; - if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, NULL) + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, + NULL) == -1) goto err1; if (_ma_write_keypage(info, keyinfo, new_root, @@ -710,10 +722,11 @@ err1: 0 OK */ -int maria_rtree_insert(MARIA_HA *info, uint keynr, uchar *key, uint key_length) +int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length) { return (!key_length || - (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ? -1 : 0; + (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ? + -1 : 0; } @@ -756,18 +769,18 @@ err1: 2 Empty leaf */ -static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, my_off_t page, uint *page_size, - stPageList *ReinsertList, int level) +static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, my_off_t page, + uint *page_size, + stPageList *ReinsertList, int level) { - uchar *k; - uchar *last; ulong i; uint nod_flag; - uchar *page_buf; int res; + byte *page_buf, *last, *k; - if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; @@ -779,7 +792,7 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar * k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); last = rt_PAGE_END(page_buf); - for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), ++i) + for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), i++) { if (nod_flag) { @@ -792,7 +805,8 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar * case 0: /* deleted */ { /* test page filling */ - if (*page_size + key_length >= rt_PAGE_MIN_SIZE(keyinfo->block_length)) + if (*page_size + key_length >= + rt_PAGE_MIN_SIZE(keyinfo->block_length)) { /* OK */ if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, @@ -805,7 +819,8 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar * else { /* too small: delete key & add it descendant to reinsert list */ - if (maria_rtree_fill_reinsert_list(ReinsertList, _ma_kpos(nod_flag, k), + if (maria_rtree_fill_reinsert_list(ReinsertList, + _ma_kpos(nod_flag, k), level + 1)) goto err1; maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); @@ -883,7 +898,7 @@ err1: 0 Deleted */ -int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length) +int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length) { uint page_size; stPageList ReinsertList; @@ -914,12 +929,10 @@ int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length) ulong i; for (i = 0; i < ReinsertList.n_pages; ++i) { - uchar *page_buf; uint nod_flag; - uchar *k; - uchar *last; + byte *page_buf, *k, *last; - if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) { my_errno = HA_ERR_OUT_OF_MEM; goto err1; @@ -935,11 +948,11 @@ int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length) if (maria_rtree_insert_level(info, keynr, k, key_length, ReinsertList.pages[i].level) == -1) { - my_afree((byte*)page_buf); + my_afree(page_buf); goto err1; } } - my_afree((byte*)page_buf); + my_afree(page_buf); if (_ma_dispose(info, keyinfo, ReinsertList.pages[i].offs, DFLT_INIT_HITS)) goto err1; @@ -969,16 +982,14 @@ int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length) err1: return -1; } - case 1: /* not found */ + case 1: /* not found */ { my_errno = HA_ERR_KEY_NOT_FOUND; return -1; } default: - case -1: /* error */ - { + case -1: /* error */ return -1; - } } } @@ -990,17 +1001,14 @@ err1: estimated value */ -ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key, uint key_length, uint flag) { MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; my_off_t root; uint i = 0; - uchar *k; - uchar *last; - uint nod_flag; - uchar *page_buf; - uint k_len; + uint nod_flag, k_len; + byte *page_buf, *k, *last; double area = 0; ha_rows res = 0; @@ -1009,7 +1017,7 @@ ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) return HA_POS_ERROR; - if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length))) + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length))) return HA_POS_ERROR; if (!_ma_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0)) goto err1; @@ -1020,7 +1028,7 @@ ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); last = rt_PAGE_END(page_buf); - for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), ++i) + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), i++) { if (nod_flag) { @@ -1035,7 +1043,8 @@ ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, } else if (flag & (MBR_WITHIN | MBR_EQUAL)) { - if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, + MBR_WITHIN)) area += 1; } else @@ -1045,14 +1054,15 @@ ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, { if (flag & (MBR_CONTAIN | MBR_INTERSECT)) { - area += maria_rtree_overlapping_area(keyinfo->seg, key, k, key_length) / - k_area; + area+= maria_rtree_overlapping_area(keyinfo->seg, key, k, + key_length) / k_area; } else if (flag & (MBR_WITHIN | MBR_EQUAL)) { - if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) - area += maria_rtree_rect_volume(keyinfo->seg, key, key_length) / - k_area; + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, + MBR_WITHIN)) + area+= (maria_rtree_rect_volume(keyinfo->seg, key, key_length) / + k_area); } else goto err1; @@ -1076,7 +1086,7 @@ ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, return res; err1: - my_afree((byte*)page_buf); + my_afree(page_buf); return HA_POS_ERROR; } diff --git a/storage/maria/ma_rt_index.h b/storage/maria/ma_rt_index.h index ff431d81372..76b6c6e230c 100644 --- a/storage/maria/ma_rt_index.h +++ b/storage/maria/ma_rt_index.h @@ -27,21 +27,22 @@ #define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length) / 3) -int maria_rtree_insert(MARIA_HA *info, uint keynr, uchar *key, uint key_length); -int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length); +int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length); +int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length); -int maria_rtree_find_first(MARIA_HA *info, uint keynr, uchar *key, uint key_length, - uint search_flag); +int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint search_flag); int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag); int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length); int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length); -ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key, - uint key_length, uint flag); +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint flag); -int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint key_length, my_off_t *new_page_offs); +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_length, + my_off_t *new_page_offs); #endif /*HAVE_RTREE_KEYS*/ #endif /* _rt_index_h */ diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c index 2732fefffbe..1453195d263 100644 --- a/storage/maria/ma_rt_key.c +++ b/storage/maria/ma_rt_key.c @@ -30,8 +30,8 @@ 1 Split */ -int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, uchar *page_buf, my_off_t *new_page) +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, byte *page_buf, my_off_t *new_page) { uint page_size = maria_getint(page_buf); uint nod_flag = _ma_test_if_nod(page_buf); @@ -61,14 +61,16 @@ int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, new_page) ? -1 : 1); } + /* Delete key from the page */ -int maria_rtree_delete_key(MARIA_HA *info, uchar *page_buf, uchar *key, + +int maria_rtree_delete_key(MARIA_HA *info, byte *page_buf, byte *key, uint key_length, uint nod_flag) { uint16 page_size = maria_getint(page_buf); - uchar *key_start; + byte *key_start; key_start= key - nod_flag; if (!nod_flag) @@ -87,7 +89,7 @@ int maria_rtree_delete_key(MARIA_HA *info, uchar *page_buf, uchar *key, Calculate and store key MBR */ -int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, uint key_length, my_off_t child_page) { if (!_ma_fetch_keypage(info, keyinfo, child_page, diff --git a/storage/maria/ma_rt_key.h b/storage/maria/ma_rt_key.h index 448024ed8c5..f44251782c1 100644 --- a/storage/maria/ma_rt_key.h +++ b/storage/maria/ma_rt_key.h @@ -22,12 +22,12 @@ #ifdef HAVE_RTREE_KEYS -int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, uchar *page_buf, my_off_t *new_page); -int maria_rtree_delete_key(MARIA_HA *info, uchar *page, uchar *key, - uint key_length, uint nod_flag); -int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uint key_length, my_off_t child_page); +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, byte *page_buf, my_off_t *new_page); +int maria_rtree_delete_key(MARIA_HA *info, byte *page, byte *key, + uint key_length, uint nod_flag); +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, my_off_t child_page); #endif /*HAVE_RTREE_KEYS*/ #endif /* _rt_key_h */ diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c index 67b1d59f505..851618a4300 100644 --- a/storage/maria/ma_rt_mbr.c +++ b/storage/maria/ma_rt_mbr.c @@ -93,8 +93,9 @@ MBR_DATA(a,b) Data reference is the same Returns 0 on success. */ -int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *b, uchar *a, uint key_length, - uint nextflag) + +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *b, byte *a, uint key_length, + uint nextflag) { for (; (int) key_length > 0; keyseg += 2 ) { @@ -153,7 +154,7 @@ int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *b, uchar *a, uint key_length, end: if (nextflag & MBR_DATA) { - uchar *end = a + keyseg->length; + byte *end = a + keyseg->length; do { if (*a++ != *b++) @@ -182,7 +183,7 @@ end: /* Calculates rectangle volume */ -double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar *a, uint key_length) +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte *a, uint key_length) { double res = 1; for (; (int)key_length > 0; keyseg += 2) @@ -263,7 +264,7 @@ double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar *a, uint key_length) Creates an MBR as an array of doubles. */ -int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res) +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, double *res) { for (; (int)key_length > 0; keyseg += 2) { @@ -352,7 +353,7 @@ int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res) Result is written to c */ -int maria_rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c, +int maria_rtree_combine_rect(HA_KEYSEG *keyseg, byte* a, byte* b, byte* c, uint key_length) { for ( ; (int) key_length > 0 ; keyseg += 2) @@ -443,7 +444,7 @@ int maria_rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c, /* Calculates overlapping area of two MBRs a & b */ -double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b, +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte* a, byte* b, uint key_length) { double res = 1; @@ -525,10 +526,11 @@ double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b, } /* -Calculates MBR_AREA(a+b) - MBR_AREA(a) + Calculates MBR_AREA(a+b) - MBR_AREA(a) */ -double maria_rtree_area_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, - uint key_length, double *ab_area) + +double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length, double *ab_area) { double a_area= 1.0; double loc_ab_area= 1.0; @@ -620,7 +622,7 @@ safe_end: /* Calculates MBR_PERIMETER(a+b) - MBR_PERIMETER(a) */ -double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b, uint key_length, double *ab_perim) { double a_perim = 0.0; @@ -731,16 +733,16 @@ double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, } /* -Calculates key page total MBR = MBR(key1) + MBR(key2) + ... + Calculates key page total MBR = MBR(key1) + MBR(key2) + ... */ -int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, uchar *page_buf, - uchar *c, uint key_length) +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf, + byte *c, uint key_length) { uint inc = 0; uint k_len = key_length; uint nod_flag = _ma_test_if_nod(page_buf); - uchar *k; - uchar *last = rt_PAGE_END(page_buf); + byte *k; + byte *last = rt_PAGE_END(page_buf); for (; (int)key_length > 0; keyseg += 2) { diff --git a/storage/maria/ma_rt_mbr.h b/storage/maria/ma_rt_mbr.h index 81e2a6851d4..3282ee0d7a3 100644 --- a/storage/maria/ma_rt_mbr.h +++ b/storage/maria/ma_rt_mbr.h @@ -20,19 +20,20 @@ #ifdef HAVE_RTREE_KEYS -int maria_rtree_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, uint key_length, - uint nextflag); -int maria_rtree_combine_rect(HA_KEYSEG *keyseg,uchar *, uchar *, uchar*, - uint key_length); -double maria_rtree_rect_volume(HA_KEYSEG *keyseg, uchar*, uint key_length); -int maria_rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res); -double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar *a, uchar *b, - uint key_length); -double maria_rtree_area_increase(HA_KEYSEG *keyseg, uchar *a, uchar *b, - uint key_length, double *ab_area); -double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, uchar* a, uchar* b, - uint key_length, double *ab_perim); -int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, uchar *page_buf, - uchar* c, uint key_length); +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *a, byte *b, uint key_length, + uint nextflag); +int maria_rtree_combine_rect(HA_KEYSEG *keyseg,byte *, byte *, byte*, + uint key_length); +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte*, uint key_length); +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, + double *res); +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length); +double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length, double *ab_area); +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length, double *ab_perim); +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf, + byte* c, uint key_length); #endif /*HAVE_RTREE_KEYS*/ #endif /* _rt_mbr_h */ diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c index 034799efd89..00c8d18f5e5 100644 --- a/storage/maria/ma_rt_split.c +++ b/storage/maria/ma_rt_split.c @@ -27,7 +27,7 @@ typedef struct { double square; int n_node; - uchar *key; + byte *key; double *coords; } SplitStruct; @@ -243,8 +243,9 @@ static int split_maria_rtree_node(SplitStruct *node, int n_entries, return 0; } -int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, uchar *key, - uint key_length, my_off_t *new_page_offs) +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, + uint key_length, my_off_t *new_page_offs) { int n1, n2; /* Number of items in groups */ @@ -255,8 +256,8 @@ int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, u double *next_coord; double *old_coord; int n_dim; - uchar *source_cur, *cur1, *cur2; - uchar *new_page; + byte *source_cur, *cur1, *cur2; + byte *new_page; int err_code= 0; uint nod_flag= _ma_test_if_nod(page); uint full_length= key_length + (nod_flag ? nod_flag : @@ -300,7 +301,7 @@ int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, u goto split_err; } - if (!(new_page = (uchar*)my_alloca((uint)keyinfo->block_length))) + if (!(new_page = (byte*) my_alloca((uint)keyinfo->block_length))) { err_code= -1; goto split_err; @@ -313,7 +314,7 @@ int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, u n1= n2 = 0; for (cur = task; cur < stop; ++cur) { - uchar *to; + byte *to; if (cur->n_node == 1) { to = cur1; diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c index ca4825c2ce2..04b0c88c222 100644 --- a/storage/maria/ma_rt_test.c +++ b/storage/maria/ma_rt_test.c @@ -153,10 +153,11 @@ static int run_test(const char *filename) create_info.max_rows=10000000; if (maria_create(filename, - 1, /* keys */ - keyinfo, - 1+2*ndims+opt_unique, /* columns */ - recinfo,uniques,&uniquedef,&create_info,create_flag)) + DYNAMIC_RECORD, + 1, /* keys */ + keyinfo, + 1+2*ndims+opt_unique, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) goto err; if (!silent) diff --git a/storage/maria/ma_scan.c b/storage/maria/ma_scan.c index c9c988722b7..4538c87e2be 100644 --- a/storage/maria/ma_scan.c +++ b/storage/maria/ma_scan.c @@ -21,26 +21,41 @@ int maria_scan_init(register MARIA_HA *info) { DBUG_ENTER("maria_scan_init"); - info->nextpos=info->s->pack.header_length; /* Read first record */ + + info->cur_row.nextpos= info->s->pack.header_length; /* Read first record */ info->lastinx= -1; /* Can't forward or backward */ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) DBUG_RETURN(my_errno); + + if ((*info->s->scan_init)(info)) + DBUG_RETURN(my_errno); DBUG_RETURN(0); } /* - Read a row based on position. - If filepos= HA_OFFSET_ERROR then read next row - Return values - Returns one of following values: - 0 = Ok. - HA_ERR_END_OF_FILE = EOF. + Read a row based on position. + + SYNOPSIS + maria_scan() + info Maria handler + record Read data here + + RETURN + 0 ok + HA_ERR_END_OF_FILE End of file + # Error code */ -int maria_scan(MARIA_HA *info, byte *buf) +int maria_scan(MARIA_HA *info, byte *record) { DBUG_ENTER("maria_scan"); /* Init all but update-flag */ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); - DBUG_RETURN ((*info->s->read_rnd)(info,buf,info->nextpos,1)); + DBUG_RETURN((*info->s->scan)(info, record, info->cur_row.nextpos, 1)); +} + + +void maria_scan_end(MARIA_HA *info) +{ + (*info->s->scan_end)(info); } diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c index af25be06a09..d8738ae4639 100644 --- a/storage/maria/ma_search.c +++ b/storage/maria/ma_search.c @@ -19,8 +19,9 @@ #include "ma_fulltext.h" #include "m_ctype.h" -static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uchar *keypos, +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, + byte *key, byte *keypos, uint *return_key_length); /* Check index */ @@ -55,31 +56,32 @@ int _ma_check_index(MARIA_HA *info, int inx) */ int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uchar *key, uint key_len, uint nextflag, register my_off_t pos) + byte *key, uint key_len, uint nextflag, register my_off_t pos) { my_bool last_key; int error,flag; uint nod_flag; - uchar *keypos,*maxpos; - uchar lastkey[HA_MAX_KEY_BUFF],*buff; + byte *keypos,*maxpos; + byte lastkey[HA_MAX_KEY_BUFF],*buff; DBUG_ENTER("_ma_search"); DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", - (ulong) pos, nextflag, (ulong) info->lastpos)); + (ulong) pos, nextflag, (ulong) info->cur_row.lastpos)); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_len);); if (pos == HA_OFFSET_ERROR) { my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; if (!(nextflag & (SEARCH_SMALLER | SEARCH_BIGGER | SEARCH_LAST))) DBUG_RETURN(-1); /* Not found ; return error */ DBUG_RETURN(1); /* Search at upper levels */ } - if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff, + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS, + info->keyread_buff, test(!(nextflag & SEARCH_SAVE_BUFF))))) goto err; - DBUG_DUMP("page",(byte*) buff,maria_getint(buff)); + DBUG_DUMP("page", buff, maria_getint(buff)); flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, &keypos,lastkey, &last_key); @@ -118,9 +120,10 @@ int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, } if (pos != info->last_keypage) { - uchar *old_buff=buff; - if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff, - test(!(nextflag & SEARCH_SAVE_BUFF))))) + byte *old_buff=buff; + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS, + info->keyread_buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) goto err; keypos=buff+(keypos-old_buff); maxpos=buff+(maxpos-old_buff); @@ -133,8 +136,8 @@ int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, &info->lastkey_length)) goto err; if (!(nextflag & SEARCH_SMALLER) && - ha_key_cmp(keyinfo->seg, info->lastkey, key, key_len, SEARCH_FIND, - not_used)) + ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, key_len, + SEARCH_FIND, not_used)) { my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ goto err; @@ -147,22 +150,22 @@ int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, goto err; memcpy(info->lastkey,lastkey,info->lastkey_length); } - info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); /* Save position for a possible read next / previous */ - info->int_keypos=info->buff+ (keypos-buff); - info->int_maxpos=info->buff+ (maxpos-buff); + info->int_keypos= info->keyread_buff+ (keypos-buff); + info->int_maxpos= info->keyread_buff+ (maxpos-buff); info->int_nod_flag=nod_flag; info->int_keytree_version=keyinfo->version; info->last_search_keypage=info->last_keypage; info->page_changed=0; - info->buff_used= (info->buff != buff); /* If we have to reread buff */ + info->keybuff_used= (info->keyread_buff != buff); /* If we have to reread */ - DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); DBUG_RETURN(0); err: DBUG_PRINT("exit",("Error: %d",my_errno)); - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; info->page_changed=1; DBUG_RETURN (-1); } /* _ma_search */ @@ -173,9 +176,9 @@ err: /* ret_pos point to where find or bigger key starts */ /* ARGSUSED */ -int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint key_len, uint comp_flag, uchar **ret_pos, - uchar *buff __attribute__((unused)), my_bool *last_key) +int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_len, uint comp_flag, byte **ret_pos, + byte *buff __attribute__((unused)), my_bool *last_key) { reg4 int start,mid,end,save_end; int flag; @@ -192,16 +195,16 @@ int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, while (start != end) { mid= (start+end)/2; - if ((flag=ha_key_cmp(keyinfo->seg,page+(uint) mid*totlength,key,key_len, - comp_flag, not_used)) + if ((flag=ha_key_cmp(keyinfo->seg,(uchar*) page+(uint) mid*totlength, + (uchar*) key, key_len, comp_flag, not_used)) >= 0) end=mid; else start=mid+1; } if (mid != start) - flag=ha_key_cmp(keyinfo->seg,page+(uint) start*totlength,key,key_len, - comp_flag, not_used); + flag=ha_key_cmp(keyinfo->seg, (uchar*) page+(uint) start*totlength, + (uchar*) key, key_len, comp_flag, not_used); if (flag < 0) start++; /* point at next, bigger key */ *ret_pos=page+(uint) start*totlength; @@ -237,13 +240,13 @@ int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, < 0 Not found. */ -int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint key_len, uint comp_flag, uchar **ret_pos, - uchar *buff, my_bool *last_key) +int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_len, uint comp_flag, byte **ret_pos, + byte *buff, my_bool *last_key) { int flag; uint nod_flag,length,not_used[2]; - uchar t_buff[HA_MAX_KEY_BUFF],*end; + byte t_buff[HA_MAX_KEY_BUFF],*end; DBUG_ENTER("_ma_seq_search"); LINT_INIT(flag); LINT_INIT(length); @@ -264,8 +267,8 @@ int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, length, (long) page, (long) end)); DBUG_RETURN(MARIA_FOUND_WRONG_KEY); } - if ((flag=ha_key_cmp(keyinfo->seg,t_buff,key,key_len,comp_flag, - not_used)) >= 0) + if ((flag= ha_key_cmp(keyinfo->seg, (uchar*) t_buff,(uchar*) key, + key_len,comp_flag, not_used)) >= 0) break; #ifdef EXTRA_DEBUG DBUG_PRINT("loop",("page: 0x%lx key: '%s' flag: %d", (long) page, t_buff, @@ -282,9 +285,9 @@ int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, } /* _ma_seq_search */ -int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint key_len, uint nextflag, uchar **ret_pos, - uchar *buff, my_bool *last_key) +int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, uint nextflag, + byte **ret_pos, byte *buff, my_bool *last_key) { /* my_flag is raw comparison result to be changed according to @@ -295,10 +298,11 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag uint nod_flag, length, len, matched, cmplen, kseg_len; uint prefix_len,suffix_len; int key_len_skip, seg_len_pack, key_len_left; - uchar *end, *kseg, *vseg; - uchar *sort_order=keyinfo->seg->charset->sort_order; - uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; - uchar *saved_from, *saved_to, *saved_vseg; + byte *end; + uchar *kseg, *vseg, *saved_vseg, *saved_from; + uchar *sort_order= keyinfo->seg->charset->sort_order; + byte tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + byte *saved_to; uint saved_length=0, saved_prefix_len=0; uint length_pack; DBUG_ENTER("_ma_prefix_search"); @@ -315,9 +319,9 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag nod_flag=_ma_test_if_nod(page); page+=2+nod_flag; *ret_pos=page; - kseg=key; + kseg= (uchar*) key; - get_key_pack_length(kseg_len,length_pack,kseg); + get_key_pack_length(kseg_len, length_pack, kseg); key_len_skip=length_pack+kseg_len; key_len_left=(int) key_len- (int) key_len_skip; /* If key_len is 0, then lenght_pack is 1, then key_len_left is -1. */ @@ -344,7 +348,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag { uint packed= *page & 128; - vseg=page; + vseg= (uchar*) page; if (keyinfo->seg->length >= 127) { suffix_len=mi_uint2korr(vseg) & 32767; @@ -387,7 +391,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack, suffix_len,vseg)); { - uchar *from=vseg+suffix_len; + uchar *from= vseg+suffix_len; HA_KEYSEG *keyseg; uint l; @@ -408,9 +412,9 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag from+=l; } - from+=keyseg->length; - page=from+nod_flag; - length=from-vseg; + from+= keyseg->length; + page= (byte*) from+nod_flag; + length= (uint) (from-vseg); } if (page > end) @@ -427,7 +431,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag { /* We have to compare. But we can still skip part of the key */ uint left; - uchar *k=kseg+prefix_len; + uchar *k= kseg+prefix_len; /* If prefix_len > cmplen then we are in the end-space comparison @@ -477,7 +481,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag for ( ; k < end && *k == ' '; k++) ; if (k == end) goto cmp_rest; /* should never happen */ - if (*k < (uchar) ' ') + if ((uchar) *k < (uchar) ' ') { my_flag= 1; /* Compared string is smaller */ break; @@ -493,11 +497,11 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag /* We have to compare k and vseg as if they were space extended */ for (end=vseg + (len-cmplen) ; - vseg < end && *vseg == (uchar) ' '; + vseg < end && *vseg == (byte) ' '; vseg++, matched++) ; DBUG_ASSERT(vseg < end); - if (*vseg > (uchar) ' ') + if ((uchar) *vseg > (uchar) ' ') { my_flag= 1; /* Compared string is smaller */ break; @@ -534,8 +538,8 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag /* else (matched < prefix_len) ---> do nothing. */ memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); - saved_to=buff+saved_length; - saved_from=saved_vseg; + saved_to= buff+saved_length; + saved_from= saved_vseg; saved_length=length; *ret_pos=page; } @@ -544,12 +548,12 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag if (flag == 0) { memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); - saved_to=buff+saved_length; - saved_from=saved_vseg; + saved_to= buff+saved_length; + saved_from= saved_vseg; saved_length=length; } if (saved_length) - memcpy(saved_to,saved_from,saved_length); + memcpy(saved_to, (byte*) saved_from, saved_length); *last_key= page == end; @@ -560,7 +564,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *pag /* Get pos to a key_block */ -my_off_t _ma_kpos(uint nod_flag, uchar *after_key) +my_off_t _ma_kpos(uint nod_flag, byte *after_key) { after_key-=nod_flag; switch (nod_flag) { @@ -596,7 +600,7 @@ my_off_t _ma_kpos(uint nod_flag, uchar *after_key) /* Save pos to a key_block */ -void _ma_kpointer(register MARIA_HA *info, register uchar *buff, my_off_t pos) +void _ma_kpointer(register MARIA_HA *info, register byte *buff, my_off_t pos) { pos/=MARIA_MIN_KEY_BLOCK_LENGTH; switch (info->s->base.key_reflength) { @@ -615,7 +619,7 @@ void _ma_kpointer(register MARIA_HA *info, register uchar *buff, my_off_t pos) case 4: mi_int4store(buff,pos); break; case 3: mi_int3store(buff,pos); break; case 2: mi_int2store(buff,(uint) pos); break; - case 1: buff[0]= (uchar) pos; break; + case 1: buff[0]= (char) (uchar) pos; break; default: abort(); /* impossible */ } } /* _ma_kpointer */ @@ -624,7 +628,7 @@ void _ma_kpointer(register MARIA_HA *info, register uchar *buff, my_off_t pos) /* Calc pos to a data-record from a key */ -my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, uchar *after_key) +my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, const byte *after_key) { my_off_t pos; after_key-=(nod_flag + info->s->rec_reflength); @@ -646,15 +650,14 @@ my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, uchar *after_key) default: pos=0L; /* Shut compiler up */ } - return (info->s->options & - (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? pos : - pos*info->s->base.pack_reclength; + return ((info->s->data_file_type == STATIC_RECORD) ? + pos * info->s->base.pack_reclength : pos); } /* Calc position from a record pointer ( in delete link chain ) */ -my_off_t _ma_rec_pos(MARIA_SHARE *s, uchar *ptr) +my_off_t _ma_rec_pos(MARIA_SHARE *s, byte *ptr) { my_off_t pos; switch (s->rec_reflength) { @@ -704,20 +707,18 @@ my_off_t _ma_rec_pos(MARIA_SHARE *s, uchar *ptr) break; default: abort(); /* Impossible */ } - return ((s->options & - (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? pos : - pos*s->base.pack_reclength); + return ((s->data_file_type == STATIC_RECORD) ? + pos * s->base.pack_reclength : pos); } /* save position to record */ -void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos) +void _ma_dpointer(MARIA_HA *info, byte *buff, my_off_t pos) { - if (!(info->s->options & - (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) && + if (info->s->data_file_type == STATIC_RECORD && pos != HA_OFFSET_ERROR) - pos/=info->s->base.pack_reclength; + pos/= info->s->base.pack_reclength; switch (info->s->rec_reflength) { #if SIZEOF_OFF_T > 4 @@ -752,7 +753,7 @@ void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos) /* same as _ma_get_key but used with fixed length keys */ uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, - register uchar **page, register uchar *key) + register byte **page, register byte *key) { memcpy((byte*) key,(byte*) *page, (size_t) (keyinfo->keylength+nod_flag)); @@ -776,10 +777,10 @@ uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, */ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, - register uchar **page_pos, register uchar *key) + register byte **page_pos, register byte *key) { reg1 HA_KEYSEG *keyseg; - uchar *start_key,*page=*page_pos; + byte *start_key,*page=*page_pos; uint length; start_key=key; @@ -788,7 +789,7 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, if (keyseg->flag & HA_PACK_KEY) { /* key with length, packed to previous key */ - uchar *start=key; + byte *start= key; uint packed= *page & 128,tot_length,rest_length; if (keyseg->length >= 127) { @@ -834,7 +835,7 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, tot_length=rest_length+length; /* If the stored length has changed, we must move the key */ - if (tot_length >= 255 && *start != 255) + if (tot_length >= 255 && *start != (char) 255) { /* length prefix changed from a length of one to a length of 3 */ bmove_upp((char*) key+length+3,(char*) key+length+1,length); @@ -842,7 +843,7 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, mi_int2store(key+1,tot_length); key+=3+length; } - else if (tot_length < 255 && *start == 255) + else if (tot_length < 255 && *start == (char) 255) { bmove(key+1,key+3,length); *key=tot_length; @@ -891,7 +892,7 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) { - uchar *tmp=page; + byte *tmp=page; get_key_length(length,tmp); length+=(uint) (tmp-page); } @@ -913,10 +914,10 @@ uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, /* key that is packed relatively to previous */ uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, - register uchar **page_pos, register uchar *key) + register byte **page_pos, register byte *key) { reg1 HA_KEYSEG *keyseg; - uchar *start_key,*page,*page_end,*from,*from_end; + byte *start_key,*page,*page_end,*from,*from_end; uint length,tmp; DBUG_ENTER("_ma_get_binary_pack_key"); @@ -1018,8 +1019,8 @@ uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, /* Get key at position without knowledge of previous key */ /* Returns pointer to next key */ -uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uchar *keypos, uint *return_key_length) +byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, byte *keypos, uint *return_key_length) { uint nod_flag; DBUG_ENTER("_ma_get_key"); @@ -1054,8 +1055,8 @@ uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, /* Get key at position without knowledge of previous key */ /* Returns 0 if ok */ -static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uchar *keypos, +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, byte *keypos, uint *return_key_length) { uint nod_flag; @@ -1092,11 +1093,11 @@ static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *pa /* Get last key from key-page */ /* Return pointer to where key starts */ -uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, - uchar *lastkey, uchar *endpos, uint *return_key_length) +byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *lastkey, byte *endpos, uint *return_key_length) { uint nod_flag; - uchar *lastpos; + byte *lastpos; DBUG_ENTER("_ma_get_last_key"); DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page, (long) endpos)); @@ -1135,15 +1136,15 @@ uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *page, /* Calculate length of key */ -uint _ma_keylength(MARIA_KEYDEF *keyinfo, register uchar *key) +uint _ma_keylength(MARIA_KEYDEF *keyinfo, register const byte *key) { reg1 HA_KEYSEG *keyseg; - uchar *start; + const byte *start; if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) return (keyinfo->keylength); - start=key; + start= key; for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) { if (keyseg->flag & HA_NULL_PART) @@ -1170,11 +1171,11 @@ uint _ma_keylength(MARIA_KEYDEF *keyinfo, register uchar *key) after '0xDF' but find 'ss' */ -uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register uchar *key, +uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key, HA_KEYSEG *end) { reg1 HA_KEYSEG *keyseg; - uchar *start= key; + const byte *start= key; for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++) { @@ -1193,29 +1194,35 @@ uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register uchar *key, return (uint) (key-start); } - /* Move a key */ -uchar *_ma_move_key(MARIA_KEYDEF *keyinfo, uchar *to, uchar *from) +/* Move a key */ + +byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from) { reg1 uint length; - memcpy((byte*) to, (byte*) from, - (size_t) (length= _ma_keylength(keyinfo,from))); + memcpy(to, from, (size_t) (length= _ma_keylength(keyinfo, from))); return to+length; } - /* Find next/previous record with same key */ - /* This can't be used when database is touched after last read */ + +/* + Find next/previous record with same key + + WARNING + This can't be used when database is touched after last read +*/ int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uchar *key, uint key_length, uint nextflag, my_off_t pos) + byte *key, uint key_length, uint nextflag, my_off_t pos) { int error; uint nod_flag; - uchar lastkey[HA_MAX_KEY_BUFF]; + byte lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("_ma_search_next"); - DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu", - nextflag, (ulong) info->lastpos, - (ulong) info->int_keypos)); + DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu page_changed %d keybuff_used: %d", + nextflag, (ulong) info->cur_row.lastpos, + (ulong) info->int_keypos, + info->page_changed, info->keybuff_used)); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_length);); /* Force full read if we are at last key or if we are not on a leaf @@ -1228,20 +1235,20 @@ int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (((nextflag & SEARCH_BIGGER) && info->int_keypos >= info->int_maxpos) || info->page_changed || (info->int_keytree_version != keyinfo->version && - (info->int_nod_flag || info->buff_used))) + (info->int_nod_flag || info->keybuff_used))) DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, nextflag | SEARCH_SAVE_BUFF, pos)); - if (info->buff_used) + if (info->keybuff_used) { if (!_ma_fetch_keypage(info,keyinfo,info->last_search_keypage, - DFLT_INIT_HITS,info->buff,0)) + DFLT_INIT_HITS,info->keyread_buff,0)) DBUG_RETURN(-1); - info->buff_used=0; + info->keybuff_used=0; } - /* Last used buffer is in info->buff */ - nod_flag=_ma_test_if_nod(info->buff); + /* Last used buffer is in info->keyread_buff */ + nod_flag=_ma_test_if_nod(info->keyread_buff); if (nextflag & SEARCH_BIGGER) /* Next key */ { @@ -1261,11 +1268,11 @@ int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, { uint length; /* Find start of previous key */ - info->int_keypos= _ma_get_last_key(info,keyinfo,info->buff,lastkey, + info->int_keypos= _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey, info->int_keypos, &length); if (!info->int_keypos) DBUG_RETURN(-1); - if (info->int_keypos == info->buff+2) + if (info->int_keypos == info->keyread_buff+2) DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, nextflag | SEARCH_SAVE_BUFF, pos)); if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, @@ -1274,84 +1281,84 @@ int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, DBUG_RETURN(error); /* QQ: We should be able to optimize away the following call */ - if (! _ma_get_last_key(info,keyinfo,info->buff,lastkey, + if (! _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey, info->int_keypos,&info->lastkey_length)) DBUG_RETURN(-1); } memcpy(info->lastkey,lastkey,info->lastkey_length); - info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); - DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); DBUG_RETURN(0); } /* _ma_search_next */ /* Search after position for the first row in an index */ - /* This is stored in info->lastpos */ + /* This is stored in info->cur_row.lastpos */ int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, register my_off_t pos) { uint nod_flag; - uchar *page; + byte *page; DBUG_ENTER("_ma_search_first"); if (pos == HA_OFFSET_ERROR) { my_errno=HA_ERR_KEY_NOT_FOUND; - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; DBUG_RETURN(-1); } do { - if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,0)) + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->keyread_buff,0)) { - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; DBUG_RETURN(-1); } - nod_flag=_ma_test_if_nod(info->buff); - page=info->buff+2+nod_flag; + nod_flag=_ma_test_if_nod(info->keyread_buff); + page=info->keyread_buff+2+nod_flag; } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page, info->lastkey))) DBUG_RETURN(-1); /* Crashed */ - info->int_keypos=page; info->int_maxpos=info->buff+maria_getint(info->buff)-1; + info->int_keypos=page; info->int_maxpos=info->keyread_buff+maria_getint(info->keyread_buff)-1; info->int_nod_flag=nod_flag; info->int_keytree_version=keyinfo->version; info->last_search_keypage=info->last_keypage; - info->page_changed=info->buff_used=0; - info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + info->page_changed=info->keybuff_used=0; + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); - DBUG_PRINT("exit",("found key at %lu", (ulong) info->lastpos)); + DBUG_PRINT("exit",("found key at %lu", (ulong) info->cur_row.lastpos)); DBUG_RETURN(0); } /* _ma_search_first */ /* Search after position for the last row in an index */ - /* This is stored in info->lastpos */ + /* This is stored in info->cur_row.lastpos */ int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, register my_off_t pos) { uint nod_flag; - uchar *buff,*page; + byte *buff,*page; DBUG_ENTER("_ma_search_last"); if (pos == HA_OFFSET_ERROR) { my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; DBUG_RETURN(-1); } - buff=info->buff; + buff=info->keyread_buff; do { if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0)) { - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; DBUG_RETURN(-1); } page= buff+maria_getint(buff); @@ -1361,14 +1368,14 @@ int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (!_ma_get_last_key(info,keyinfo,buff,info->lastkey,page, &info->lastkey_length)) DBUG_RETURN(-1); - info->lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); info->int_keypos=info->int_maxpos=page; info->int_nod_flag=nod_flag; info->int_keytree_version=keyinfo->version; info->last_search_keypage=info->last_keypage; - info->page_changed=info->buff_used=0; + info->page_changed=info->keybuff_used=0; - DBUG_PRINT("exit",("found key at %lu",(ulong) info->lastpos)); + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); DBUG_RETURN(0); } /* _ma_search_last */ @@ -1391,12 +1398,12 @@ int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, - uchar *next_pos __attribute__((unused)), - uchar *org_key __attribute__((unused)), - uchar *prev_key __attribute__((unused)), - uchar *key, MARIA_KEY_PARAM *s_temp) + byte *next_pos __attribute__((unused)), + byte *org_key __attribute__((unused)), + byte *prev_key __attribute__((unused)), + const byte *key, MARIA_KEY_PARAM *s_temp) { - s_temp->key=key; + s_temp->key= key; return (int) (s_temp->totlength=keyinfo->keylength+nod_flag); } @@ -1404,18 +1411,18 @@ _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, - uchar *next_pos __attribute__((unused)), - uchar *org_key __attribute__((unused)), - uchar *prev_key __attribute__((unused)), - uchar *key, MARIA_KEY_PARAM *s_temp) + byte *next_pos __attribute__((unused)), + byte *org_key __attribute__((unused)), + byte *prev_key __attribute__((unused)), + const byte *key, MARIA_KEY_PARAM *s_temp) { - s_temp->key=key; + s_temp->key= key; return (int) (s_temp->totlength= _ma_keylength(keyinfo,key)+nod_flag); } /* length of key with a variable length first segment which is prefix - compressed (mariachk reports 'packed + stripped') + compressed (maria_chk reports 'packed + stripped') Keys are compressed the following way: @@ -1434,15 +1441,16 @@ _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar *next_key, - uchar *org_key, uchar *prev_key, uchar *key, + byte *next_key, + byte *org_key, byte *prev_key, const byte *key, MARIA_KEY_PARAM *s_temp) { reg1 HA_KEYSEG *keyseg; int length; uint key_length,ref_length,org_key_length=0, length_pack,new_key_length,diff_flag,pack_marker; - uchar *start,*end,*key_end,*sort_order; + const byte *start,*end,*key_end; + uchar *sort_order; bool same_length; length_pack=s_temp->ref_length=s_temp->n_ref_length=s_temp->n_length=0; @@ -1455,7 +1463,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, (keyseg->type == HA_KEYTYPE_VARTEXT1) || (keyseg->type == HA_KEYTYPE_VARTEXT2)) && !use_strnxfrm(keyseg->charset)) - sort_order=keyseg->charset->sort_order; + sort_order= keyseg->charset->sort_order; /* diff flag contains how many bytes is needed to pack key */ if (keyseg->length >= 127) @@ -1475,10 +1483,10 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, { if (!*key++) { - s_temp->key=key; - s_temp->key_length=0; - s_temp->totlength=key_length-1+diff_flag; - s_temp->next_key_pos=0; /* No next key */ + s_temp->key= key; + s_temp->key_length= 0; + s_temp->totlength= key_length-1+diff_flag; + s_temp->next_key_pos= 0; /* No next key */ return (s_temp->totlength); } s_temp->store_not_null=1; @@ -1490,13 +1498,13 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, } else s_temp->store_not_null=0; - s_temp->prev_key=org_key; + s_temp->prev_key= org_key; /* The key part will start with a packed length */ get_key_pack_length(new_key_length,length_pack,key); - end=key_end= key+ new_key_length; - start=key; + end= key_end= key+ new_key_length; + start= key; /* Calc how many characters are identical between this and the prev. key */ if (prev_key) @@ -1507,11 +1515,12 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, if (new_key_length && new_key_length == org_key_length) same_length=1; else if (new_key_length > org_key_length) - end=key + org_key_length; + end= key + org_key_length; if (sort_order) /* SerG */ { - while (key < end && sort_order[*key] == sort_order[*prev_key]) + while (key < end && + sort_order[* (uchar*) key] == sort_order[* (uchar*) prev_key]) { key++; prev_key++; } @@ -1592,7 +1601,8 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, key=start; if (sort_order) /* SerG */ { - while (key < end && sort_order[*key] == sort_order[*org_key]) + while (key < end && + sort_order[*(uchar*) key] == sort_order[*(uchar*) org_key]) { key++; org_key++; } @@ -1672,11 +1682,11 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, uint tmp_length; key=(start+=ref_length); if (key+n_length < key_end) /* Normalize length based */ - key_end=key+n_length; + key_end= key+n_length; if (sort_order) /* SerG */ { - while (key < key_end && sort_order[*key] == - sort_order[*next_key]) + while (key < key_end && + sort_order[*(uchar*) key] == sort_order[*(uchar*) next_key]) { key++; next_key++; } @@ -1716,8 +1726,9 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, /* Length of key which is prefix compressed */ int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar *next_key, - uchar *org_key, uchar *prev_key, uchar *key, + byte *next_key, + byte *org_key, byte *prev_key, + const byte *key, MARIA_KEY_PARAM *s_temp) { uint length,key_length,ref_length; @@ -1732,10 +1743,10 @@ int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, { /* pack key against previous key */ /* - As keys may be identical when running a sort in mariachk, we + As keys may be identical when running a sort in maria_chk, we have to guard against the case where keys may be identical */ - uchar *end; + const byte *end; end=key+key_length; for ( ; *key == *prev_key && key < end; key++,prev_key++) ; s_temp->ref_length= ref_length=(uint) (key-s_temp->key); @@ -1756,7 +1767,7 @@ int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, /* If first key and next key is packed (only on delete) */ if (!prev_key && org_key && next_length) { - uchar *end; + const byte *end; for (key= s_temp->key, end=key+next_length ; *key == *org_key && key < end; key++,org_key++) ; @@ -1798,7 +1809,7 @@ int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, /* store key without compression */ void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), - register uchar *key_pos, + register byte *key_pos, register MARIA_KEY_PARAM *s_temp) { memcpy((byte*) key_pos,(byte*) s_temp->key,(size_t) s_temp->totlength); @@ -1813,11 +1824,11 @@ void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), - register uchar *key_pos, + register byte *key_pos, register MARIA_KEY_PARAM *s_temp) { uint length; - uchar *start; + byte *start; start=key_pos; @@ -1876,7 +1887,7 @@ void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), /* variable length key with prefix compression */ void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), - register uchar *key_pos, + register byte *key_pos, register MARIA_KEY_PARAM *s_temp) { store_key_length_inc(key_pos,s_temp->ref_length); diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index 5ae23c37261..3859bd96149 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -44,31 +44,31 @@ extern void print_error _VARARGS((const char *fmt,...)); /* Functions defined in this file */ static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info,uint keys, - uchar **sort_keys, + byte **sort_keys, DYNAMIC_ARRAY *buffpek,int *maxbuffer, IO_CACHE *tempfile, IO_CACHE *tempfile_for_exceptions); -static int NEAR_F write_keys(MARIA_SORT_PARAM *info,uchar **sort_keys, +static int NEAR_F write_keys(MARIA_SORT_PARAM *info, byte **sort_keys, uint count, BUFFPEK *buffpek,IO_CACHE *tempfile); -static int NEAR_F write_key(MARIA_SORT_PARAM *info, uchar *key, +static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key, IO_CACHE *tempfile); -static int NEAR_F write_index(MARIA_SORT_PARAM *info,uchar * *sort_keys, +static int NEAR_F write_index(MARIA_SORT_PARAM *info, byte **sort_keys, uint count); static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info,uint keys, - uchar * *sort_keys, + byte **sort_keys, BUFFPEK *buffpek,int *maxbuffer, IO_CACHE *t_file); static uint NEAR_F read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek, uint sort_length); static int NEAR_F merge_buffers(MARIA_SORT_PARAM *info,uint keys, IO_CACHE *from_file, IO_CACHE *to_file, - uchar * *sort_keys, BUFFPEK *lastbuff, + byte **sort_keys, BUFFPEK *lastbuff, BUFFPEK *Fb, BUFFPEK *Tb); -static int NEAR_F merge_index(MARIA_SORT_PARAM *,uint,uchar **,BUFFPEK *, int, +static int NEAR_F merge_index(MARIA_SORT_PARAM *,uint, byte **,BUFFPEK *, int, IO_CACHE *); static int flush_maria_ft_buf(MARIA_SORT_PARAM *info); -static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info,uchar **sort_keys, +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, byte **sort_keys, uint count, BUFFPEK *buffpek, IO_CACHE *tempfile); static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek, @@ -96,27 +96,27 @@ my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs); <> 0 Error */ -int _ma_create_index_by_sort(MARIA_SORT_PARAM *info,my_bool no_messages, - ulong sortbuff_size) +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, + ulong sortbuff_size) { int error,maxbuffer,skr; uint memavl,old_memavl,keys,sort_length; DYNAMIC_ARRAY buffpek; ha_rows records; - uchar **sort_keys; + byte **sort_keys; IO_CACHE tempfile, tempfile_for_exceptions; DBUG_ENTER("_ma_create_index_by_sort"); DBUG_PRINT("enter",("sort_length: %d", info->key_length)); if (info->keyinfo->flag & HA_VAR_LENGTH_KEY) { - info->write_keys=write_keys_varlen; + info->write_keys= write_keys_varlen; info->read_to_buffer=read_to_buffer_varlen; info->write_key=write_merge_key_varlen; } else { - info->write_keys=write_keys; + info->write_keys= write_keys; info->read_to_buffer=read_to_buffer; info->write_key=write_merge_key; } @@ -124,7 +124,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info,my_bool no_messages, my_b_clear(&tempfile); my_b_clear(&tempfile_for_exceptions); bzero((char*) &buffpek,sizeof(buffpek)); - sort_keys= (uchar **) NULL; error= 1; + sort_keys= (byte **) NULL; error= 1; maxbuffer=1; memavl=max(sortbuff_size,MIN_SORT_MEMORY); @@ -152,8 +152,8 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info,my_bool no_messages, } while ((maxbuffer= (int) (records/(keys-1)+1)) != skr); - if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ - HA_FT_MAXBYTELEN, MYF(0)))) + if ((sort_keys=(byte**) my_malloc(keys*(sort_length+sizeof(char*))+ + HA_FT_MAXBYTELEN, MYF(0)))) { if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer, maxbuffer/2)) @@ -230,7 +230,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info,my_bool no_messages, && !my_b_read(&tempfile_for_exceptions,(byte*)sort_keys, (uint) key_length)) { - if (_ma_ck_write(index,keyno,(uchar*) sort_keys,key_length-ref_length)) + if (_ma_ck_write(index,keyno,(byte*) sort_keys,key_length-ref_length)) goto err; } } @@ -251,7 +251,7 @@ err: /* Search after all keys and place them in a temp. file */ static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, - uchar **sort_keys, DYNAMIC_ARRAY *buffpek, + byte **sort_keys, DYNAMIC_ARRAY *buffpek, int *maxbuffer, IO_CACHE *tempfile, IO_CACHE *tempfile_for_exceptions) { @@ -260,7 +260,7 @@ static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, DBUG_ENTER("find_all_keys"); idx=error=0; - sort_keys[0]=(uchar*) (sort_keys+keys); + sort_keys[0]= (byte*) (sort_keys+keys); while (!(error=(*info->key_read)(info,sort_keys[idx]))) { @@ -277,7 +277,7 @@ static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, tempfile)) DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ - sort_keys[0]=(uchar*) (sort_keys+keys); + sort_keys[0]=(byte*) (sort_keys+keys); memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); idx=1; } @@ -308,7 +308,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) int error; uint memavl,old_memavl,keys,sort_length; uint idx, maxbuffer; - uchar **sort_keys=0; + byte **sort_keys= 0; LINT_INIT(keys); @@ -336,7 +336,6 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) my_b_clear(&info->tempfile_for_exceptions); bzero((char*) &info->buffpek,sizeof(info->buffpek)); bzero((char*) &info->unique, sizeof(info->unique)); - sort_keys= (uchar **) NULL; memavl=max(info->sortbuff_size, MIN_SORT_MEMORY); idx= info->sort_info->max_records; @@ -365,15 +364,15 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) } while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); } - if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ - ((info->keyinfo->flag & HA_FULLTEXT) ? - HA_FT_MAXBYTELEN : 0), MYF(0)))) + if ((sort_keys=(byte**) my_malloc(keys*(sort_length+sizeof(char*))+ + ((info->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) { if (my_init_dynamic_array(&info->buffpek, sizeof(BUFFPEK), maxbuffer, maxbuffer/2)) { my_free((gptr) sort_keys,MYF(0)); - sort_keys= (uchar **) NULL; /* for err: label */ + sort_keys= (byte**) NULL; /* for err: label */ } else break; @@ -393,7 +392,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) info->sort_keys=sort_keys; idx=error=0; - sort_keys[0]=(uchar*) (sort_keys+keys); + sort_keys[0]=(byte*) (sort_keys+keys); while (!(error=info->sort_info->got_error) && !(error=(*info->key_read)(info,sort_keys[idx]))) @@ -411,7 +410,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) (BUFFPEK *)alloc_dynamic(&info->buffpek), &info->tempfile)) goto err; - sort_keys[0]=(uchar*) (sort_keys+keys); + sort_keys[0]=(byte*) (sort_keys+keys); memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); idx=1; } @@ -422,7 +421,8 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) if (info->buffpek.elements) { if (info->write_keys(info,sort_keys, idx, - (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile)) + (BUFFPEK *) alloc_dynamic(&info->buffpek), + &info->tempfile)) goto err; info->keys=(info->buffpek.elements-1)*(keys-1)+idx; } @@ -434,8 +434,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg) err: info->sort_info->got_error=1; /* no need to protect this with a mutex */ - if (sort_keys) - my_free((gptr) sort_keys,MYF(0)); + my_free((gptr) sort_keys, MYF(MY_ALLOW_ZERO_PTR)); info->sort_keys=0; delete_dynamic(& info->buffpek); close_cached_file(&info->tempfile); @@ -499,8 +498,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) } } my_free((gptr) sinfo->sort_keys,MYF(0)); - my_free(_ma_get_rec_buff_ptr(info, sinfo->rec_buff), - MYF(MY_ALLOW_ZERO_PTR)); + my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); sinfo->sort_keys=0; } @@ -548,7 +546,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) { if (param->testflag & T_VERBOSE) printf("Key %d - Merging %u keys\n",sinfo->key+1, sinfo->keys); - if (merge_many_buff(sinfo, keys, (uchar **)mergebuf, + if (merge_many_buff(sinfo, keys, (byte **) mergebuf, dynamic_element(&sinfo->buffpek, 0, BUFFPEK *), (int*) &maxbuffer, &sinfo->tempfile)) { @@ -564,7 +562,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) } if (param->testflag & T_VERBOSE) printf("Key %d - Last merge and dumping keys\n", sinfo->key+1); - if (merge_index(sinfo, keys, (uchar **)mergebuf, + if (merge_index(sinfo, keys, (byte**) mergebuf, dynamic_element(&sinfo->buffpek,0,BUFFPEK *), maxbuffer,&sinfo->tempfile) || flush_maria_ft_buf(sinfo) || @@ -596,7 +594,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) if (key_length > sizeof(maria_ft_buf) || my_b_read(&sinfo->tempfile_for_exceptions, (byte*)maria_ft_buf, (uint)key_length) || - _ma_ck_write(info, sinfo->key, (uchar*)maria_ft_buf, + _ma_ck_write(info, sinfo->key, maria_ft_buf, key_length - info->s->rec_reflength)) got_error=1; } @@ -607,12 +605,14 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) } #endif /* THREAD */ - /* Write all keys in memory to file for later merge */ -static int NEAR_F write_keys(MARIA_SORT_PARAM *info, register uchar **sort_keys, +/* Write all keys in memory to file for later merge */ + +static int NEAR_F write_keys(MARIA_SORT_PARAM *info, + register byte **sort_keys, uint count, BUFFPEK *buffpek, IO_CACHE *tempfile) { - uchar **end; + byte **end; uint sort_length=info->key_length; DBUG_ENTER("write_keys"); @@ -628,7 +628,7 @@ static int NEAR_F write_keys(MARIA_SORT_PARAM *info, register uchar **sort_keys, for (end=sort_keys+count ; sort_keys != end ; sort_keys++) { - if (my_b_write(tempfile,(byte*) *sort_keys,(uint) sort_length)) + if (my_b_write(tempfile, *sort_keys, (uint) sort_length)) DBUG_RETURN(1); /* purecov: inspected */ } DBUG_RETURN(0); @@ -639,7 +639,7 @@ static inline int my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs) { int err; - uint16 len = _ma_keylength(info->keyinfo, (uchar*) bufs); + uint16 len= _ma_keylength(info->keyinfo, bufs); /* The following is safe as this is a local file */ if ((err= my_b_write(to_file, (byte*)&len, sizeof(len)))) @@ -651,11 +651,11 @@ my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs) static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, - register uchar **sort_keys, + register byte **sort_keys, uint count, BUFFPEK *buffpek, IO_CACHE *tempfile) { - uchar **end; + byte **end; int err; DBUG_ENTER("write_keys_varlen"); @@ -670,14 +670,14 @@ static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, buffpek->count=count; for (end=sort_keys+count ; sort_keys != end ; sort_keys++) { - if ((err= my_var_write(info,tempfile, (byte*) *sort_keys))) + if ((err= my_var_write(info,tempfile, *sort_keys))) DBUG_RETURN(err); } DBUG_RETURN(0); } /* write_keys_varlen */ -static int NEAR_F write_key(MARIA_SORT_PARAM *info, uchar *key, +static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key, IO_CACHE *tempfile) { uint key_length=info->real_key_length; @@ -688,8 +688,8 @@ static int NEAR_F write_key(MARIA_SORT_PARAM *info, uchar *key, DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) DBUG_RETURN(1); - if (my_b_write(tempfile,(byte*)&key_length,sizeof(key_length)) || - my_b_write(tempfile,(byte*)key,(uint) key_length)) + if (my_b_write(tempfile, (byte*)&key_length,sizeof(key_length)) || + my_b_write(tempfile, key, (uint) key_length)) DBUG_RETURN(1); DBUG_RETURN(0); } /* write_key */ @@ -697,7 +697,8 @@ static int NEAR_F write_key(MARIA_SORT_PARAM *info, uchar *key, /* Write index */ -static int NEAR_F write_index(MARIA_SORT_PARAM *info, register uchar **sort_keys, +static int NEAR_F write_index(MARIA_SORT_PARAM *info, + register byte **sort_keys, register uint count) { DBUG_ENTER("write_index"); @@ -706,7 +707,7 @@ static int NEAR_F write_index(MARIA_SORT_PARAM *info, register uchar **sort_keys (qsort2_cmp) info->key_cmp,info); while (count--) { - if ((*info->key_write)(info,*sort_keys++)) + if ((*info->key_write)(info, *sort_keys++)) DBUG_RETURN(-1); /* purecov: inspected */ } DBUG_RETURN(0); @@ -716,7 +717,7 @@ static int NEAR_F write_index(MARIA_SORT_PARAM *info, register uchar **sort_keys /* Merge buffers to make < MERGEBUFF2 buffers */ static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info, uint keys, - uchar **sort_keys, BUFFPEK *buffpek, + byte **sort_keys, BUFFPEK *buffpek, int *maxbuffer, IO_CACHE *t_file) { register int i; @@ -797,11 +798,11 @@ static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, register uint count; uint16 length_of_key = 0; uint idx; - uchar *buffp; + byte *buffp; if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) { - buffp = buffpek->base; + buffp= buffpek->base; for (idx=1;idx<=count;idx++) { @@ -855,18 +856,17 @@ static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)) static int NEAR_F merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, - IO_CACHE *to_file, uchar **sort_keys, BUFFPEK *lastbuff, + IO_CACHE *to_file, byte **sort_keys, BUFFPEK *lastbuff, BUFFPEK *Fb, BUFFPEK *Tb) { int error; uint sort_length,maxcount; ha_rows count; my_off_t to_start_filepos; - uchar *strpos; + byte *strpos; BUFFPEK *buffpek,**refpek; QUEUE queue; volatile int *killed= _ma_killed_ptr(info->sort_info->param); - DBUG_ENTER("merge_buffers"); count=error=0; @@ -874,7 +874,7 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, LINT_INIT(to_start_filepos); if (to_file) to_start_filepos=my_b_tell(to_file); - strpos=(uchar*) sort_keys; + strpos= (byte*) sort_keys; sort_length=info->key_length; if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0, @@ -923,7 +923,7 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, { if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) { - uchar *base=buffpek->base; + byte *base= buffpek->base; uint max_keys=buffpek->max_keys; VOID(queue_remove(&queue,0)); @@ -955,7 +955,7 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, } } buffpek=(BUFFPEK*) queue_top(&queue); - buffpek->base=(uchar *) sort_keys; + buffpek->base= (byte*) sort_keys; buffpek->max_keys=keys; do { @@ -969,21 +969,21 @@ merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, } else { - register uchar *end; + register byte *end; strpos= buffpek->key; - for (end=strpos+buffpek->mem_count*sort_length; + for (end= strpos+buffpek->mem_count*sort_length; strpos != end ; strpos+=sort_length) { - if ((*info->key_write)(info,(void*) strpos)) + if ((*info->key_write)(info, (byte*) strpos)) { error=1; goto err; /* purecov: inspected */ } } } } - while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) != -1 && - error != 0); + while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) != + -1 && error != 0); lastbuff->count=count; if (to_file) @@ -997,7 +997,7 @@ err: /* Do a merge to output-file (save only positions) */ static int NEAR_F -merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys, +merge_index(MARIA_SORT_PARAM *info, uint keys, byte **sort_keys, BUFFPEK *buffpek, int maxbuffer, IO_CACHE *tempfile) { DBUG_ENTER("merge_index"); @@ -1007,8 +1007,8 @@ merge_index(MARIA_SORT_PARAM *info, uint keys, uchar **sort_keys, DBUG_RETURN(0); } /* merge_index */ -static int -flush_maria_ft_buf(MARIA_SORT_PARAM *info) + +static int flush_maria_ft_buf(MARIA_SORT_PARAM *info) { int err=0; if (info->sort_info->ft_buf) diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h index a7e282f0ddc..8b9dd204ded 100644 --- a/storage/maria/ma_sp_defs.h +++ b/storage/maria/ma_sp_defs.h @@ -41,7 +41,7 @@ enum wkbByteOrder wkbNDR = 1 /* Little Endian */ }; -uint sp_make_key(register MARIA_HA *info, uint keynr, uchar *key, +uint sp_make_key(register MARIA_HA *info, uint keynr, byte *key, const byte *record, my_off_t filepos); #endif /*HAVE_SPATIAL*/ diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c index b9841fed1e7..79345550dd9 100644 --- a/storage/maria/ma_sp_key.c +++ b/storage/maria/ma_sp_key.c @@ -37,7 +37,7 @@ static void get_double(double *d, const byte *pos) float8get(*d, pos); } -uint sp_make_key(register MARIA_HA *info, uint keynr, uchar *key, +uint sp_make_key(register MARIA_HA *info, uint keynr, byte *key, const byte *record, my_off_t filepos) { HA_KEYSEG *keyseg; diff --git a/storage/maria/ma_sp_test.c b/storage/maria/ma_sp_test.c index ea812974c8c..1ac1a74d7d7 100644 --- a/storage/maria/ma_sp_test.c +++ b/storage/maria/ma_sp_test.c @@ -109,10 +109,11 @@ int run_test(const char *filename) create_info.max_rows=10000000; if (maria_create(filename, - 1, /* keys */ - keyinfo, - 2, /* columns */ - recinfo,uniques,&uniquedef,&create_info,create_flag)) + DYNAMIC_RECORD, + 1, /* keys */ + keyinfo, + 2, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) goto err; if (!silent) diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c index 511c5507aaf..c5580e1e981 100644 --- a/storage/maria/ma_static.c +++ b/storage/maria/ma_static.c @@ -25,12 +25,13 @@ LIST *maria_open_list=0; uchar NEAR maria_file_magic[]= -{ (uchar) 254, (uchar) 254,'\007', '\001', }; +{ (uchar) 254, (uchar) 254, (uchar) 9, '\001', }; uchar NEAR maria_pack_file_magic[]= -{ (uchar) 254, (uchar) 254,'\010', '\002', }; +{ (uchar) 254, (uchar) 254, (uchar) 10, '\001', }; uint maria_quick_table_bits=9; ulong maria_block_size= MARIA_KEY_BLOCK_LENGTH; -my_bool maria_flush=0, maria_delay_key_write=0, maria_single_user=0; +my_bool maria_flush= 0, maria_single_user= 0; +my_bool maria_delay_key_write= 0; #if defined(THREAD) && !defined(DONT_USE_RW_LOCKS) ulong maria_concurrent_insert= 2; #else @@ -38,11 +39,14 @@ ulong maria_concurrent_insert= 0; #endif my_off_t maria_max_temp_length= MAX_FILE_SIZE; ulong maria_bulk_insert_tree_size=8192*1024; -ulong maria_data_pointer_size=4; +ulong maria_data_pointer_size= 4; KEY_CACHE maria_key_cache_var; KEY_CACHE *maria_key_cache= &maria_key_cache_var; +/* Enough for comparing if number is zero */ +byte maria_zero_string[]= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + /* read_vec[] is used for converting between P_READ_KEY.. and SEARCH_ Position is , == , >= , <= , > , < diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c index 0aef24f40a9..11049d6f279 100644 --- a/storage/maria/ma_statrec.c +++ b/storage/maria/ma_statrec.c @@ -19,9 +19,9 @@ #include "maria_def.h" -int _ma_write_static_record(MARIA_HA *info, const byte *record) +my_bool _ma_write_static_record(MARIA_HA *info, const byte *record) { - uchar temp[8]; /* max pointer length */ + byte temp[8]; /* max pointer length */ if (info->s->state.dellink != HA_OFFSET_ERROR && !info->append_insert_at_end) { @@ -86,7 +86,8 @@ int _ma_write_static_record(MARIA_HA *info, const byte *record) return 1; } -int _ma_update_static_record(MARIA_HA *info, my_off_t pos, const byte *record) +my_bool _ma_update_static_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record) { info->rec_cache.seek_not_done=1; /* We have done a seek */ return (info->s->file_write(info, @@ -96,22 +97,22 @@ int _ma_update_static_record(MARIA_HA *info, my_off_t pos, const byte *record) } -int _ma_delete_static_record(MARIA_HA *info) +my_bool _ma_delete_static_record(MARIA_HA *info) { - uchar temp[9]; /* 1+sizeof(uint32) */ - + byte temp[9]; /* 1+sizeof(uint32) */ info->state->del++; info->state->empty+=info->s->base.pack_reclength; temp[0]= '\0'; /* Mark that record is deleted */ _ma_dpointer(info,temp+1,info->s->state.dellink); - info->s->state.dellink = info->lastpos; + info->s->state.dellink= info->cur_row.lastpos; info->rec_cache.seek_not_done=1; - return (info->s->file_write(info,(byte*) temp, 1+info->s->rec_reflength, - info->lastpos, MYF(MY_NABP)) != 0); + return (info->s->file_write(info, temp, 1+info->s->rec_reflength, + info->cur_row.lastpos, MYF(MY_NABP)) != 0); } -int _ma_cmp_static_record(register MARIA_HA *info, register const byte *old) +my_bool _ma_cmp_static_record(register MARIA_HA *info, + register const byte *old) { DBUG_ENTER("_ma_cmp_static_record"); @@ -122,7 +123,7 @@ int _ma_cmp_static_record(register MARIA_HA *info, register const byte *old) { if (flush_io_cache(&info->rec_cache)) { - DBUG_RETURN(-1); + DBUG_RETURN(1); } info->rec_cache.seek_not_done=1; /* We have done a seek */ } @@ -130,10 +131,11 @@ int _ma_cmp_static_record(register MARIA_HA *info, register const byte *old) if ((info->opt_flag & READ_CHECK_USED)) { /* If check isn't disabled */ info->rec_cache.seek_not_done=1; /* We have done a seek */ - if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength, - info->lastpos, - MYF(MY_NABP))) - DBUG_RETURN(-1); + if (info->s->file_read(info, (char*) info->rec_buff, + info->s->base.reclength, + info->cur_row.lastpos, + MYF(MY_NABP))) + DBUG_RETURN(1); if (memcmp((byte*) info->rec_buff, (byte*) old, (uint) info->s->base.reclength)) { @@ -147,27 +149,31 @@ int _ma_cmp_static_record(register MARIA_HA *info, register const byte *old) } -int _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, - const byte *record, my_off_t pos) +my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) { DBUG_ENTER("_ma_cmp_static_unique"); info->rec_cache.seek_not_done=1; /* We have done a seek */ if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength, pos, MYF(MY_NABP))) - DBUG_RETURN(-1); - DBUG_RETURN(_ma_unique_comp(def, record, info->rec_buff, - def->null_are_equal)); + DBUG_RETURN(1); + DBUG_RETURN(_ma_unique_comp(def, record, (byte*) info->rec_buff, + def->null_are_equal)); } - /* Read a fixed-length-record */ - /* Returns 0 if Ok. */ - /* 1 if record is deleted */ - /* MY_FILE_ERROR on read-error or locking-error */ +/* + Read a fixed-length-record -int _ma_read_static_record(register MARIA_HA *info, register my_off_t pos, - register byte *record) + RETURN + 0 Ok + 1 record delete + -1 on read-error or locking-error +*/ + +int _ma_read_static_record(register MARIA_HA *info, register byte *record, + MARIA_RECORD_POS pos) { int error; @@ -180,7 +186,7 @@ int _ma_read_static_record(register MARIA_HA *info, register my_off_t pos, info->rec_cache.seek_not_done=1; /* We have done a seek */ error=info->s->file_read(info,(char*) record,info->s->base.reclength, - pos,MYF(MY_NABP)) != 0; + pos, MYF(MY_NABP)) != 0; fast_ma_writeinfo(info); if (! error) { @@ -201,8 +207,8 @@ int _ma_read_static_record(register MARIA_HA *info, register my_off_t pos, int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, - register my_off_t filepos, - my_bool skip_deleted_blocks) + MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) { int locked,error,cache_read; uint cache_length; @@ -211,10 +217,6 @@ int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, cache_read=0; cache_length=0; - if (info->opt_flag & WRITE_CACHE_USED && - (info->rec_cache.pos_in_file <= filepos || skip_deleted_blocks) && - flush_io_cache(&info->rec_cache)) - DBUG_RETURN(my_errno); if (info->opt_flag & READ_CACHE_USED) { /* Cache in use */ if (filepos == my_b_tell(&info->rec_cache) && @@ -256,12 +258,12 @@ int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, fast_ma_writeinfo(info); DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE); } - info->lastpos= filepos; - info->nextpos= filepos+share->base.pack_reclength; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= filepos+share->base.pack_reclength; if (! cache_read) /* No cacheing */ { - if ((error= _ma_read_static_record(info,filepos,buf))) + if ((error= _ma_read_static_record(info, buf, filepos))) { if (error > 0) error=my_errno=HA_ERR_RECORD_DELETED; diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c index 69d432a5d95..0f37391c1d4 100644 --- a/storage/maria/ma_test1.c +++ b/storage/maria/ma_test1.c @@ -28,12 +28,13 @@ static int rec_pointer_size=0, flags[50]; static int key_field=FIELD_SKIP_PRESPACE,extra_field=FIELD_SKIP_ENDSPACE; static int key_type=HA_KEYTYPE_NUM; static int create_flag=0; +static enum data_file_type record_type= DYNAMIC_RECORD; static uint insert_count, update_count, remove_count; static uint pack_keys=0, pack_seg=0, key_length; static uint unique_key=HA_NOSAME; static my_bool key_cacheing, null_fields, silent, skip_update, opt_unique, - verbose; + verbose, skip_delete; static MARIA_COLUMNDEF recinfo[4]; static MARIA_KEYDEF keyinfo[10]; static HA_KEYSEG keyseg[10]; @@ -63,31 +64,30 @@ static int run_test(const char *filename) MARIA_HA *file; int i,j,error,deleted,rec_length,uniques=0; ha_rows found,row_count; - my_off_t pos; char record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; MARIA_UNIQUEDEF uniquedef; MARIA_CREATE_INFO create_info; bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) &create_info,sizeof(create_info)); /* First define 2 columns */ - recinfo[0].type=FIELD_NORMAL; recinfo[0].length=1; /* For NULL bits */ - recinfo[1].type=key_field; - recinfo[1].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr : + create_info.null_bytes= 1; + recinfo[0].type= key_field; + recinfo[0].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr : key_length); if (key_field == FIELD_VARCHAR) - recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);; - recinfo[2].type=extra_field; - recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : 24); + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(key_length); + recinfo[1].type=extra_field; + recinfo[1].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : 24); if (extra_field == FIELD_VARCHAR) - recinfo[2].length+= HA_VARCHAR_PACKLENGTH(recinfo[2].length); + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(recinfo[1].length); if (opt_unique) { - recinfo[3].type=FIELD_CHECK; - recinfo[3].length=MARIA_UNIQUE_HASH_LENGTH; + recinfo[2].type=FIELD_CHECK; + recinfo[2].length=MARIA_UNIQUE_HASH_LENGTH; } - rec_length=recinfo[0].length+recinfo[1].length+recinfo[2].length+ - recinfo[3].length; + rec_length= recinfo[0].length+recinfo[1].length+recinfo[2].length; if (key_type == HA_KEYTYPE_VARTEXT1 && key_length > 255) @@ -125,8 +125,8 @@ static int run_test(const char *filename) for (i=0, start=1 ; i < 2 ; i++) { uniqueseg[i].start=start; - start+=recinfo[i+1].length; - uniqueseg[i].length=recinfo[i+1].length; + start+=recinfo[i].length; + uniqueseg[i].length=recinfo[i].length; uniqueseg[i].language= default_charset_info->number; } uniqueseg[0].type= key_type; @@ -139,18 +139,21 @@ static int run_test(const char *filename) uniqueseg[1].flag|= HA_BLOB_PART; } else if (extra_field == FIELD_VARCHAR) + { uniqueseg[1].flag|= HA_VAR_LENGTH_PART; + uniqueseg[1].type= (HA_VARCHAR_PACKLENGTH(recinfo[1].length-1) == 1 ? + HA_KEYTYPE_VARTEXT1 : HA_KEYTYPE_VARTEXT2); + } } else uniques=0; if (!silent) - printf("- Creating isam-file\n"); - bzero((char*) &create_info,sizeof(create_info)); + printf("- Creating maria file\n"); create_info.max_rows=(ulong) (rec_pointer_size ? (1L << (rec_pointer_size*8))/40 : 0); - if (maria_create(filename,1,keyinfo,3+opt_unique,recinfo, + if (maria_create(filename, record_type, 1, keyinfo,2+opt_unique,recinfo, uniques, &uniquedef, &create_info, create_flag)) goto err; @@ -223,9 +226,10 @@ static int run_test(const char *filename) } /* Read through all rows and update them */ - pos=(my_off_t) 0; + assert(maria_scan_init(file) == 0); + found=0; - while ((error=maria_rrnd(file,read_record,pos)) == 0) + while ((error= maria_scan(file,read_record)) == 0) { if (update_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } memcpy(record,read_record,rec_length); @@ -236,17 +240,17 @@ static int run_test(const char *filename) keyinfo[0].seg[0].length,record+1,my_errno); } found++; - pos=HA_OFFSET_ERROR; } if (found != row_count) printf("Found %ld of %ld rows\n", (ulong) found, (ulong) row_count); + maria_scan_end(file); } if (!silent) printf("- Reopening file\n"); if (maria_close(file)) goto err; if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) goto err; - if (!skip_update) + if (!skip_delete) { if (!silent) printf("- Removing keys\n"); @@ -254,7 +258,13 @@ static int run_test(const char *filename) for (i=0 ; i <= 10 ; i++) { /* testing */ - if (remove_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + if (remove_count-- == 0) + { + fprintf(stderr, + "delete-rows number of rows deleted; Going down hard!\n"); + VOID(maria_close(file)); + exit(0) ; + } j=i*2; if (!flags[j]) continue; @@ -283,6 +293,7 @@ static int run_test(const char *filename) } if (!silent) printf("- Reading rows with key\n"); + record[1]= 0; /* For nicer printf */ for (i=0 ; i <= 25 ; i++) { create_key(key,i); @@ -299,10 +310,16 @@ static int run_test(const char *filename) if (!silent) printf("- Reading rows with position\n"); + if (maria_scan_init(file)) + { + fprintf(stderr, "maria_scan_init failed\n"); + goto err; + } + for (i=1,found=0 ; i <= 30 ; i++) { my_errno=0; - if ((error=maria_rrnd(file,read_record,i == 1 ? 0L : HA_OFFSET_ERROR)) == -1) + if ((error= maria_scan(file, read_record)) == HA_ERR_END_OF_FILE) { if (found != row_count-deleted) printf("Found only %ld of %ld rows\n", (ulong) found, @@ -318,7 +335,8 @@ static int run_test(const char *filename) i-1,error,my_errno,read_record+1); } } - if (maria_close(file)) goto err; + if (maria_close(file)) + goto err; maria_end(); my_end(MY_CHECK_ERROR); @@ -346,7 +364,7 @@ static void create_key_part(char *key,uint rownr) if ((rownr & 7) == 0) { /* Change the key to force a unpack of the next key */ - bfill(key+3,keyinfo[0].seg[0].length-4,rownr < 10 ? 'a' : 'b'); + bfill(key+3,keyinfo[0].seg[0].length-5,rownr < 10 ? 'a' : 'b'); } } else @@ -375,7 +393,7 @@ static void create_key(char *key,uint rownr) if (rownr == 0) { key[0]=1; /* null key */ - key[1]=0; /* Fore easy print of key */ + key[1]=0; /* For easy print of key */ return; } *key++=0; @@ -405,7 +423,7 @@ static void create_record(char *record,uint rownr) record[0]|=keyinfo[0].seg[0].null_bit; /* Null key */ pos=record+1; - if (recinfo[1].type == FIELD_BLOB) + if (recinfo[0].type == FIELD_BLOB) { uint tmp; char *ptr; @@ -414,25 +432,25 @@ static void create_record(char *record,uint rownr) int4store(pos,tmp); ptr=blob_key; memcpy_fixed(pos+4,&ptr,sizeof(char*)); - pos+=recinfo[1].length; + pos+=recinfo[0].length; } - else if (recinfo[1].type == FIELD_VARCHAR) + else if (recinfo[0].type == FIELD_VARCHAR) { - uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1); create_key_part(pos+pack_length,rownr); tmp= strlen(pos+pack_length); if (pack_length == 1) *(uchar*) pos= (uchar) tmp; else int2store(pos,tmp); - pos+= recinfo[1].length; + pos+= recinfo[0].length; } else { create_key_part(pos,rownr); - pos+=recinfo[1].length; + pos+=recinfo[0].length; } - if (recinfo[2].type == FIELD_BLOB) + if (recinfo[1].type == FIELD_BLOB) { uint tmp; char *ptr;; @@ -443,7 +461,7 @@ static void create_record(char *record,uint rownr) ptr=blob_record; memcpy_fixed(pos+4,&ptr,sizeof(char*)); } - else if (recinfo[2].type == FIELD_VARCHAR) + else if (recinfo[1].type == FIELD_VARCHAR) { uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); sprintf(pos+pack_length, "... row: %d", rownr); @@ -456,7 +474,7 @@ static void create_record(char *record,uint rownr) else { sprintf(pos,"... row: %d", rownr); - strappend(pos,recinfo[2].length,' '); + strappend(pos,recinfo[1].length,' '); } } @@ -465,7 +483,7 @@ static void create_record(char *record,uint rownr) static void update_record(char *record) { char *pos=record+1; - if (recinfo[1].type == FIELD_BLOB) + if (recinfo[0].type == FIELD_BLOB) { char *column,*ptr; int length; @@ -477,16 +495,16 @@ static void update_record(char *record) if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) default_charset_info->cset->casedn(default_charset_info, blob_key, length, blob_key, length); - pos+=recinfo[1].length; + pos+=recinfo[0].length; } - else if (recinfo[1].type == FIELD_VARCHAR) + else if (recinfo[0].type == FIELD_VARCHAR) { - uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1); uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); default_charset_info->cset->casedn(default_charset_info, pos + pack_length, length, pos + pack_length, length); - pos+=recinfo[1].length; + pos+=recinfo[0].length; } else { @@ -494,10 +512,10 @@ static void update_record(char *record) default_charset_info->cset->casedn(default_charset_info, pos, keyinfo[0].seg[0].length, pos, keyinfo[0].seg[0].length); - pos+=recinfo[1].length; + pos+=recinfo[0].length; } - if (recinfo[2].type == FIELD_BLOB) + if (recinfo[1].type == FIELD_BLOB) { char *column; int length; @@ -510,13 +528,14 @@ static void update_record(char *record) column=blob_record; memcpy_fixed(pos+4,&column,sizeof(char*)); } - else if (recinfo[2].type == FIELD_VARCHAR) + else if (recinfo[1].type == FIELD_VARCHAR) { /* Second field is longer than 10 characters */ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); - bfill(pos+pack_length+length,recinfo[2].length-length-pack_length,'.'); - length=recinfo[2].length-pack_length; + pos= record+ recinfo[1].offset; + bfill(pos+pack_length+length,recinfo[1].length-length-pack_length,'.'); + length=recinfo[1].length-pack_length; if (pack_length == 1) *(uchar*) pos= (uchar) length; else @@ -524,7 +543,7 @@ static void update_record(char *record) } else { - bfill(pos+recinfo[2].length-10,10,'.'); + bfill(pos+recinfo[1].length-10,10,'.'); } } @@ -537,44 +556,49 @@ static struct my_option my_long_options[] = {"debug", '#', "Undocumented", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, #endif - {"delete_rows", 'd', "Undocumented", (gptr*) &remove_count, - (gptr*) &remove_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"delete-rows", 'd', "Abort after this many rows has been deleted", + (gptr*) &remove_count, (gptr*) &remove_count, 0, GET_UINT, REQUIRED_ARG, + 1000, 0, 0, 0, 0, 0}, {"help", '?', "Display help and exit", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"insert_rows", 'i', "Undocumented", (gptr*) &insert_count, + {"insert-rows", 'i', "Undocumented", (gptr*) &insert_count, (gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, - {"key_alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", + {"key-alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_binary_pack", 'B', "Undocumented", + {"key-binary-pack", 'B', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_blob", 'b', "Undocumented", + {"key-blob", 'b', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_cache", 'K', "Undocumented", (gptr*) &key_cacheing, + {"key-cache", 'K', "Undocumented", (gptr*) &key_cacheing, (gptr*) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length, + {"key-length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length, 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0}, - {"key_multiple", 'm', "Undocumented", + {"key-multiple", 'm', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_prefix_pack", 'P', "Undocumented", + {"key-prefix_pack", 'P', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_space_pack", 'p', "Undocumented", + {"key-space_pack", 'p', "Undocumented", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"key_varchar", 'w', "Test VARCHAR keys", + {"key-varchar", 'w', "Test VARCHAR keys", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"null_fields", 'N', "Define fields with NULL", + {"null-fields", 'N', "Define fields with NULL", (gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"row_fixed_size", 'S', "Undocumented", + {"row-fixed-size", 'S', "Fixed size records", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"row_pointer_size", 'R', "Undocumented", (gptr*) &rec_pointer_size, + {"rows-in-block", 'M', "Store rows in block format", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"row-pointer-size", 'R', "Undocumented", (gptr*) &rec_pointer_size, (gptr*) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"silent", 's', "Undocumented", (gptr*) &silent, (gptr*) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"skip_update", 'U', "Undocumented", (gptr*) &skip_update, + {"skip-delete", 'U', "Don't test deletes", (gptr*) &skip_delete, + (gptr*) &skip_delete, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-update", 'D', "Don't test updates", (gptr*) &skip_update, (gptr*) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"unique", 'C', "Undocumented", (gptr*) &opt_unique, (gptr*) &opt_unique, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, - {"update_rows", 'u', "Undocumented", (gptr*) &update_count, + {"update-rows", 'u', "Undocumented", (gptr*) &update_count, (gptr*) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, {"verbose", 'v', "Be more verbose", (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -605,15 +629,20 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), case 'B': pack_keys= HA_BINARY_PACK_KEY; /* Use binary compression */ break; + case 'M': + record_type= BLOCK_RECORD; + break; case 'S': if (key_field == FIELD_VARCHAR) { create_flag=0; /* Static sized varchar */ + record_type= STATIC_RECORD; } else if (key_field != FIELD_BLOB) { key_field=FIELD_NORMAL; /* static-size record */ extra_field=FIELD_NORMAL; + record_type= STATIC_RECORD; } break; case 'p': @@ -629,6 +658,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), extra_field= FIELD_BLOB; pack_seg|= HA_BLOB_PART; key_type= HA_KEYTYPE_VARTEXT1; + if (record_type == STATIC_RECORD) + record_type= DYNAMIC_RECORD; break; case 'k': if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) @@ -642,7 +673,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), extra_field= FIELD_VARCHAR; key_type= HA_KEYTYPE_VARTEXT1; pack_seg|= HA_VAR_LENGTH_PART; - create_flag|= HA_PACK_RECORD; + if (record_type == STATIC_RECORD) + record_type= DYNAMIC_RECORD; break; case 'K': /* Use key cacheing */ key_cacheing=1; diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index 840ecb2eeb7..6d54a078f25 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -38,7 +38,8 @@ static void get_options(int argc, char *argv[]); static uint rnd(uint max_value); static void fix_length(byte *record,uint length); -static void put_blob_in_record(char *blob_pos,char **blob_buffer); +static void put_blob_in_record(char *blob_pos,char **blob_buffer, + ulong *length); static void copy_key(struct st_maria_info *info,uint inx, uchar *record,uchar *key); @@ -46,10 +47,11 @@ static int verbose=0,testflag=0, first_key=0,async_io=0,key_cacheing=0,write_cacheing=0,locking=0, rec_pointer_size=0,pack_fields=1,silent=0, opt_quick_mode=0; -static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1, - create_flag=0; +static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1; +static int create_flag= 0, srand_arg= 0; static ulong key_cache_size=IO_SIZE*16; static uint key_cache_block_size= KEY_CACHE_BLOCK_SIZE; +static enum data_file_type record_type= DYNAMIC_RECORD; static uint keys=MARIA_KEYS,recant=1000; static uint use_blob=0; @@ -201,16 +203,14 @@ int main(int argc, char *argv[]) for (i=4999 ; i>0 ; i--) key3[i]=0; if (!silent) - printf("- Creating isam-file\n"); - /* DBUG_PUSH(""); */ - /* my_delete(filename,MYF(0)); */ /* Remove old locks under gdb */ + printf("- Creating maria-file\n"); file= 0; bzero((char*) &create_info,sizeof(create_info)); create_info.max_rows=(ha_rows) (rec_pointer_size ? (1L << (rec_pointer_size*8))/ reclength : 0); create_info.reloc_rows=(ha_rows) 100; - if (maria_create(filename,keys,&keyinfo[first_key], + if (maria_create(filename, record_type, keys,&keyinfo[first_key], use_blob ? 7 : 6, &recinfo[0], 0,(MARIA_UNIQUEDEF*) 0, &create_info,create_flag)) @@ -230,12 +230,13 @@ int main(int argc, char *argv[]) for (i=0 ; i < recant ; i++) { + ulong blob_length; n1=rnd(1000); n2=rnd(100); n3=rnd(5000); sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count); int4store(record+STANDARD_LENGTH-4,(long) i); fix_length(record,(uint) STANDARD_LENGTH+rnd(60)); - put_blob_in_record(record+blob_pos,&blob_buffer); - DBUG_PRINT("test",("record: %d",i)); + put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length); + DBUG_PRINT("test",("record: %d blob_length: %lu", i, blob_length)); if (maria_write(file,record)) { @@ -257,7 +258,7 @@ int main(int argc, char *argv[]) } /* Check if we can find key without flushing database */ - if (i == recant/2) + if (i % 10 == 0) { for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; if (!j) @@ -270,7 +271,8 @@ int main(int argc, char *argv[]) } } } - if (testflag==1) goto end; + if (testflag == 1) + goto end; if (write_cacheing) { @@ -285,6 +287,8 @@ int main(int argc, char *argv[]) if (!silent) printf("- Delete\n"); + if (srand_arg) + srand(srand_arg); for (i=0 ; i0 && key1[j] == 0 ; j--) ; @@ -296,6 +300,12 @@ int main(int argc, char *argv[]) printf("can't find key1: \"%s\"\n",key); goto err; } + if (bcmp(read_record+keyinfo[0].seg[0].start, + key, keyinfo[0].seg[0].length)) + { + printf("Found wrong record when searching for key: \"%s\"\n",key); + goto err; + } if (opt_delete == (uint) remove_count) /* While testing */ goto end; if (maria_delete(file,read_record)) @@ -310,10 +320,13 @@ int main(int argc, char *argv[]) else puts("Warning: Skipping delete test because no dupplicate keys"); } - if (testflag==2) goto end; + if (testflag == 2) + goto end; if (!silent) printf("- Update\n"); + if (srand_arg) + srand(srand_arg); for (i=0 ; i 0 ;) @@ -602,7 +637,7 @@ int main(int argc, char *argv[]) if (maria_rsame(file,read_record2,(int) i)) goto err; if (bcmp(read_record,read_record2,reclength) != 0) { - printf("is_rsame didn't find same record\n"); + printf("maria_rsame didn't find same record\n"); goto end; } } @@ -716,12 +751,14 @@ int main(int argc, char *argv[]) } } ant=0; - while ((error=maria_rrnd(file,record,HA_OFFSET_ERROR)) != HA_ERR_END_OF_FILE && + assert(maria_scan_init(file) == 0); + while ((error= maria_scan(file,record)) != HA_ERR_END_OF_FILE && ant < write_count + 10) - ant+= error ? 0 : 1; + ant+= error ? 0 : 1; + maria_scan_end(file); if (ant != write_count-opt_delete) { - printf("rrnd with cache: I can only find: %d records of %d\n", + printf("scan with cache: I can only find: %d records of %d\n", ant,write_count-opt_delete); goto end; } @@ -743,7 +780,8 @@ int main(int argc, char *argv[]) goto end; } - if (testflag == 4) goto end; + if (testflag == 4) + goto end; if (!silent) printf("- Removing keys\n"); @@ -752,8 +790,8 @@ int main(int argc, char *argv[]) /* DBUG_POP(); */ maria_reset(file); found_parts=0; - while ((error=maria_rrnd(file,read_record,HA_OFFSET_ERROR)) != - HA_ERR_END_OF_FILE) + maria_scan_init(file); + while ((error= maria_scan(file,read_record)) != HA_ERR_END_OF_FILE) { info.recpos=maria_position(file); if (lastpos >= info.recpos && lastpos != HA_OFFSET_ERROR) @@ -767,7 +805,7 @@ int main(int argc, char *argv[]) { if (opt_delete == (uint) remove_count) /* While testing */ goto end; - if (maria_rsame(file,read_record,-1)) + if (rnd(2) == 1 && maria_rsame(file,read_record,-1)) { printf("can't find record %lx\n",(long) info.recpos); goto err; @@ -783,9 +821,10 @@ int main(int argc, char *argv[]) { if (ptr[pos] != (uchar) (blob_length+pos)) { - printf("found blob with wrong info at %ld\n",(long) lastpos); - use_blob=0; - break; + printf("Found blob with wrong info at %ld\n",(long) lastpos); + maria_scan_end(file); + my_errno= 0; + goto err; } } } @@ -793,6 +832,7 @@ int main(int argc, char *argv[]) { printf("can't delete record: %6.6s, delete_count: %d\n", read_record, opt_delete); + maria_scan_end(file); goto err; } opt_delete++; @@ -800,6 +840,7 @@ int main(int argc, char *argv[]) else found_parts++; } + maria_scan_end(file); if (my_errno != HA_ERR_END_OF_FILE && my_errno != HA_ERR_RECORD_DELETED) printf("error: %d from maria_rrnd\n",my_errno); if (write_count != opt_delete) @@ -851,8 +892,7 @@ reads: %10lu\n", (ulong) maria_key_cache->global_cache_read); } end_key_cache(maria_key_cache,1); - if (blob_buffer) - my_free(blob_buffer,MYF(0)); + my_free(blob_buffer, MYF(MY_ALLOW_ZERO_PTR)); my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO); return(0); err: @@ -864,8 +904,7 @@ err: } /* main */ - /* l{ser optioner */ - /* OBS! intierar endast DEBUG - ingen debuggning h{r ! */ +/* Read options */ static void get_options(int argc, char **argv) { @@ -879,7 +918,9 @@ static void get_options(int argc, char **argv) pack_type= HA_BINARY_PACK_KEY; break; case 'b': - use_blob=1; + use_blob= 1; + if (*++pos) + use_blob= atol(pos); break; case 'K': /* Use key cacheing */ key_cacheing=1; @@ -896,7 +937,7 @@ static void get_options(int argc, char **argv) break; case 'i': if (*++pos) - srand(atoi(pos)); + srand(srand_arg= atoi(pos)); break; case 'L': locking=1; @@ -910,9 +951,9 @@ static void get_options(int argc, char **argv) verbose=1; break; case 'm': /* records */ - if ((recant=atoi(++pos)) < 10) + if ((recant=atoi(++pos)) < 10 && testflag > 1) { - fprintf(stderr,"record count must be >= 10\n"); + fprintf(stderr,"record count must be >= 10 (if testflag != 1)\n"); exit(1); } break; @@ -943,6 +984,9 @@ static void get_options(int argc, char **argv) keys > (uint) (MARIA_KEYS-first_key)) keys=MARIA_KEYS-first_key; break; + case 'M': + record_type= BLOCK_RECORD; + break; case 'P': pack_type=0; /* Don't use DIFF_LENGTH */ pack_seg=0; @@ -954,6 +998,7 @@ static void get_options(int argc, char **argv) break; case 'S': pack_fields=0; /* Static-length-records */ + record_type= STATIC_RECORD; break; case 's': silent=1; @@ -973,7 +1018,7 @@ static void get_options(int argc, char **argv) case '?': case 'I': case 'V': - printf("%s Ver 1.2 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); puts("By Monty, for your professional use\n"); printf("Usage: %s [-?AbBcDIKLPRqSsVWltv] [-k#] [-f#] [-m#] [-e#] [-E#] [-t#]\n", progname); @@ -1010,7 +1055,8 @@ static void fix_length(byte *rec, uint length) /* Put maybe a blob in record */ -static void put_blob_in_record(char *blob_pos, char **blob_buffer) +static void put_blob_in_record(char *blob_pos, char **blob_buffer, + ulong *blob_length) { ulong i,length; if (use_blob) @@ -1028,10 +1074,12 @@ static void put_blob_in_record(char *blob_pos, char **blob_buffer) (*blob_buffer)[i]=(char) (length+i); int4store(blob_pos,length); memcpy_fixed(blob_pos+4,(char*) blob_buffer,sizeof(char*)); + *blob_length= length; } else { int4store(blob_pos,0); + *blob_length= 0; } } return; diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c index 96b896b03c6..4911d13f2f1 100644 --- a/storage/maria/ma_test3.c +++ b/storage/maria/ma_test3.c @@ -97,8 +97,8 @@ int main(int argc,char **argv) puts("- Creating maria-file"); my_delete(filename,MYF(0)); /* Remove old locks under gdb */ - if (maria_create(filename,2,&keyinfo[0],2,&recinfo[0],0,(MARIA_UNIQUEDEF*) 0, - (MARIA_CREATE_INFO*) 0,0)) + if (maria_create(filename,BLOCK_RECORD, 2, &keyinfo[0],2,&recinfo[0],0, + (MARIA_UNIQUEDEF*) 0, (MARIA_CREATE_INFO*) 0,0)) exit(1); rnd_init(0); diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index f857127dca9..17e654ac51f 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -5,143 +5,188 @@ valgrind="valgrind --alignment=8 --leak-check=yes" silent="-s" +suffix="" +#set -x -v -e -if test -f ma_test1$MACH ; then suffix=$MACH ; else suffix=""; fi -./ma_test1$suffix $silent -./maria_chk$suffix -se test1 -./ma_test1$suffix $silent -N -S -./maria_chk$suffix -se test1 -./ma_test1$suffix $silent -P --checksum -./maria_chk$suffix -se test1 -./ma_test1$suffix $silent -P -N -S -./maria_chk$suffix -se test1 -./ma_test1$suffix $silent -B -N -R2 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -k 480 --unique -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -N -S -R1 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -p -S -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -p -S -N --unique -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -p -S -N --key_length=127 --checksum -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -p -S -N --key_length=128 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -p -S --key_length=480 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -B -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -B --key_length=64 --unique -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -B -k 480 --checksum -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -m -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -m -P --unique --checksum -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -m -p -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -w -S --unique -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -w --key_length=64 --checksum -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -w -N --key_length=480 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -w -S --key_length=480 --checksum -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -b -N -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -a -b --key_length=480 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent -p -B --key_length=480 -./maria_chk$suffix -sm test1 +run_tests() +{ + row_type=$1 + # + # First some simple tests + # + ./ma_test1$suffix $silent $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -N $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -P --checksum $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -P -N $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -B -N -R2 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -k 480 --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -N -R1 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -N --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -N --key_length=127 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -N --key_length=128 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B --key_length=64 --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B -k 480 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -m $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -m -P --unique --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -m -p $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -w --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -w --key_length=64 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -w -N --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -w --key_length=480 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -b -N $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -b --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -B --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --checksum --unique $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent --unique $row_type + ./maria_chk$suffix -se test1 + + ./ma_test1$suffix $silent --key_multiple -N -S $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --key_multiple -a -p --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --key_multiple -a -B --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --key_multiple -P -S $row_type + ./maria_chk$suffix -sm test1 + + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -ess test1 + + ./ma_test2$suffix $silent -L -K -W -P $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -L -K -W -P -A $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -L -K -P -R3 -m50 -b1000000 $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -L -B $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -D -B -c $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -m10000 -e4096 -K $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -m10000 -e8192 -K $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L $row_type + ./maria_chk$suffix -sm test2 +} -./ma_test1$suffix $silent --checksum -./maria_chk$suffix -se test1 -./maria_chk$suffix -rs test1 -./maria_chk$suffix -se test1 -./maria_chk$suffix -rqs test1 -./maria_chk$suffix -se test1 -./maria_chk$suffix -rs --correct-checksum test1 -./maria_chk$suffix -se test1 -./maria_chk$suffix -rqs --correct-checksum test1 -./maria_chk$suffix -se test1 -./maria_chk$suffix -ros --correct-checksum test1 -./maria_chk$suffix -se test1 -./maria_chk$suffix -rqos --correct-checksum test1 -./maria_chk$suffix -se test1 +run_repair_tests() +{ + row_type=$1 + ./ma_test1$suffix $silent --checksum $row_type + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rs test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rs --correct-checksum test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rqs --correct-checksum test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -ros --correct-checksum test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rqos --correct-checksum test1 + ./maria_chk$suffix -se test1 +} -# check of maria_pack / maria_chk -./maria_pack$suffix --force -s test1 -./maria_chk$suffix -es test1 -./maria_chk$suffix -rqs test1 -./maria_chk$suffix -es test1 -./maria_chk$suffix -rs test1 -./maria_chk$suffix -es test1 -./maria_chk$suffix -rus test1 -./maria_chk$suffix -es test1 +run_pack_tests() +{ + row_type=$1 + # check of maria_pack / maria_chk + ./ma_test1$suffix $silent --checksum $row_type + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -ess test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -es test1 + ./maria_chk$suffix -rs test1 + ./maria_chk$suffix -es test1 + ./maria_chk$suffix -rus test1 + ./maria_chk$suffix -es test1 + + ./ma_test1$suffix $silent --checksum -S $row_type + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -ros test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -se test1 + + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -es test1 + ./maria_chk$suffix -rus test1 + ./maria_chk$suffix -es test1 +} -./ma_test1$suffix $silent --checksum -S -./maria_chk$suffix -se test1 -./maria_chk$suffix -ros test1 -./maria_chk$suffix -rqs test1 -./maria_chk$suffix -se test1 +echo "Running tests with dynamic row format" +run_tests "" +run_repair_tests "" +run_pack_tests "" -./maria_pack$suffix --force -s test1 -./maria_chk$suffix -rqs test1 -./maria_chk$suffix -es test1 -./maria_chk$suffix -rus test1 -./maria_chk$suffix -es test1 +echo "Running tests with static row format" +run_tests -S +run_repair_tests -S +run_pack_tests -S -./ma_test1$suffix $silent --checksum --unique -./maria_chk$suffix -se test1 -./ma_test1$suffix $silent --unique -S -./maria_chk$suffix -se test1 +echo "Running tests with block row format" +run_tests -M +# +# Tests that gives warnings +# -./ma_test1$suffix $silent --key_multiple -N -S -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent --key_multiple -a -p --key_length=480 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent --key_multiple -a -B --key_length=480 -./maria_chk$suffix -sm test1 -./ma_test1$suffix $silent --key_multiple -P -S -./maria_chk$suffix -sm test1 - -./ma_test2$suffix $silent -L -K -W -P -./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -L -K -W -P -A -./maria_chk$suffix -sm test2 ./ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 +./maria_chk$suffix -sm test2 echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" -./maria_chk$suffix -sm test2 ./ma_test2$suffix $silent -L -K -R1 -m2000 +echo "./maria_chk$suffix -sm test2 will warn that 'Datafile is almost full'" ./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -L -K -P -S -R3 -m50 -b1000000 -./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -L -B -./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -D -B -c -./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -m10000 -e8192 -K -./maria_chk$suffix -sm test2 -./ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L -./maria_chk$suffix -sm test2 +./maria_chk$suffix -ssm test2 -./ma_test2$suffix $silent -L -K -W -P -m50 -l -./maria_log$suffix -./ma_test2$suffix $silent -L -K -W -P -m50 -l -b100 -./maria_log$suffix +# +# Some timing tests +# time ./ma_test2$suffix $silent +time ./ma_test2$suffix $silent -S +time ./ma_test2$suffix $silent -M +time ./ma_test2$suffix $silent -B +time ./ma_test2$suffix $silent -L +time ./ma_test2$suffix $silent -K time ./ma_test2$suffix $silent -K -B time ./ma_test2$suffix $silent -L -B time ./ma_test2$suffix $silent -L -K -B time ./ma_test2$suffix $silent -L -K -W -B -time ./ma_test2$suffix $silent -L -K -W -S -B -time ./ma_test2$suffix $silent -D -K -W -S -B +time ./ma_test2$suffix $silent -L -K -W -B -S +time ./ma_test2$suffix $silent -L -K -W -B -M +time ./ma_test2$suffix $silent -D -K -W -B -S diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index bc1aa71966b..8348bfbd84b 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -22,10 +22,11 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, ha_checksum unique_hash, my_off_t disk_pos) { - my_off_t lastpos=info->lastpos; + my_off_t lastpos=info->cur_row.lastpos; MARIA_KEYDEF *key= &info->s->keyinfo[def->key]; - uchar *key_buff=info->lastkey2; + byte *key_buff= info->lastkey2; DBUG_ENTER("_ma_check_unique"); + DBUG_PRINT("enter",("unique_hash: %lu", unique_hash)); maria_unique_store(record+key->seg->start, unique_hash); _ma_make_key(info,def->key,key_buff,record,0); @@ -33,24 +34,25 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, /* The above changed info->lastkey2. Inform maria_rnext_same(). */ info->update&= ~HA_STATE_RNEXT_SAME; - if (_ma_search(info,info->s->keyinfo+def->key,key_buff,MARIA_UNIQUE_HASH_LENGTH, + if (_ma_search(info,info->s->keyinfo+def->key,key_buff, + MARIA_UNIQUE_HASH_LENGTH, SEARCH_FIND,info->s->state.key_root[def->key])) { info->page_changed=1; /* Can't optimize read next */ - info->lastpos= lastpos; + info->cur_row.lastpos= lastpos; DBUG_RETURN(0); /* No matching rows */ } for (;;) { - if (info->lastpos != disk_pos && - !(*info->s->compare_unique)(info,def,record,info->lastpos)) + if (info->cur_row.lastpos != disk_pos && + !(*info->s->compare_unique)(info,def,record,info->cur_row.lastpos)) { my_errno=HA_ERR_FOUND_DUPP_UNIQUE; info->errkey= (int) def->key; - info->dupp_key_pos= info->lastpos; - info->page_changed=1; /* Can't optimize read next */ - info->lastpos=lastpos; + info->dup_key_pos= info->cur_row.lastpos; + info->page_changed= 1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; DBUG_PRINT("info",("Found duplicate")); DBUG_RETURN(1); /* Found identical */ } @@ -60,8 +62,8 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, memcmp((char*) info->lastkey, (char*) key_buff, MARIA_UNIQUE_HASH_LENGTH)) { - info->page_changed=1; /* Can't optimize read next */ - info->lastpos=lastpos; + info->page_changed= 1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; DBUG_RETURN(0); /* end of tree */ } } @@ -144,11 +146,11 @@ ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *record) RETURN 0 if both rows have equal unique value - # Rows are different + 1 Rows are different */ -int _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, - my_bool null_are_equal) +my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal) { const byte *pos_a, *pos_b, *end; HA_KEYSEG *keyseg; diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 248b17ce2c9..c86ebd16c6f 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -24,7 +24,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) int flag,key_changed,save_errno; reg3 my_off_t pos; uint i; - uchar old_key[HA_MAX_KEY_BUFF],*new_key; + byte old_key[HA_MAX_KEY_BUFF],*new_key; bool auto_key_changed=0; ulonglong changed; MARIA_SHARE *share=info->s; @@ -49,18 +49,26 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) { DBUG_RETURN(my_errno=HA_ERR_INDEX_FILE_FULL); } - pos=info->lastpos; + pos= info->cur_row.lastpos; if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); - if (share->calc_checksum) - old_checksum=info->checksum=(*share->calc_checksum)(info,oldrec); if ((*share->compare_record)(info,oldrec)) { - save_errno=my_errno; + save_errno= my_errno; + DBUG_PRINT("warning", ("Got error from compare record")); goto err_end; /* Record has changed */ } + if (share->calc_checksum) + { + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + if (info->s->calc_checksum) + old_checksum= (*info->s->calc_checksum)(info, oldrec); + } /* Calculate and check all unique constraints */ key_changed=0; @@ -69,7 +77,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) MARIA_UNIQUEDEF *def=share->uniqueinfo+i; if (_ma_unique_comp(def, newrec, oldrec,1) && _ma_check_unique(info, def, newrec, _ma_unique_hash(def, newrec), - info->lastpos)) + pos)) { save_errno=my_errno; goto err_end; @@ -83,7 +91,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) /* Check which keys changed from the original row */ - new_key=info->lastkey2; + new_key= info->lastkey2; changed=0; for (i=0 ; i < share->base.keys ; i++) { @@ -116,7 +124,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) info->update&= ~HA_STATE_RNEXT_SAME; if (new_length != old_length || - memcmp((byte*) old_key,(byte*) new_key,new_length)) + memcmp(old_key, new_key, new_length)) { if ((int) i == info->lastinx) key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */ @@ -139,7 +147,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) if (share->calc_checksum) { - info->checksum=(*share->calc_checksum)(info,newrec); + info->cur_row.checksum= (*share->calc_checksum)(info,newrec); /* Store new checksum in index file header */ key_changed|= HA_STATE_CHANGED; } @@ -167,10 +175,13 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) set_if_bigger(info->s->state.auto_increment, ma_retrieve_auto_increment(info, newrec)); if (share->calc_checksum) - info->state->checksum+=(info->checksum - old_checksum); + info->state->checksum+= (info->cur_row.checksum - old_checksum); - info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | HA_STATE_AKTIV | - key_changed); + /* + We can't yet have HA_STATE_ACTIVE here, as block_record dosn't support + it + */ + info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | key_changed); VOID(_ma_writeinfo(info,key_changed ? WRITEINFO_UPDATE_KEYFILE : 0)); allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) @@ -184,8 +195,6 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) err: DBUG_PRINT("error",("key: %d errno: %d",i,my_errno)); save_errno=my_errno; - if (changed) - key_changed|= HA_STATE_CHANGED; if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_RECORD_FILE_FULL) { info->errkey= (int) i; diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 24768b36c89..a4fe5506c8e 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -24,33 +24,52 @@ /* Functions declared in this file */ static int w_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo, - uint comp_flag, uchar *key, - uint key_length, my_off_t pos, uchar *father_buff, - uchar *father_keypos, my_off_t father_page, + uint comp_flag, byte *key, + uint key_length, my_off_t pos, byte *father_buff, + byte *father_keypos, my_off_t father_page, my_bool insert_last); -static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key, - uchar *curr_buff,uchar *father_buff, - uchar *father_keypos,my_off_t father_page); -static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint *return_key_length, - uchar **after_key); -int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,uchar *key, +static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key, + byte *curr_buff,byte *father_buff, + byte *father_keypos,my_off_t father_page); +static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key); +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,byte *key, uint key_length); -int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,uchar *key, +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,byte *key, uint key_length); - /* Write new record to database */ + +MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, + const byte *record + __attribute__((unused))) +{ + return ((info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) ? + info->s->state.dellink : + info->state->data_file_length); +} + +my_bool _ma_write_abort_default(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + + +/* Write new record to a table */ int maria_write(MARIA_HA *info, byte *record) { MARIA_SHARE *share=info->s; uint i; int save_errno; - my_off_t filepos; - uchar *buff; + MARIA_RECORD_POS filepos; + byte *buff; my_bool lock_tree= share->concurrent_insert; + my_bool fatal_error; DBUG_ENTER("maria_write"); - DBUG_PRINT("enter",("isam: %d data: %d",info->s->kfile,info->dfile)); + DBUG_PRINT("enter",("index_file: %d data_file: %d", + info->s->kfile,info->dfile)); DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", maria_print_error(info->s, HA_ERR_CRASHED); @@ -62,10 +81,6 @@ int maria_write(MARIA_HA *info, byte *record) if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); dont_break(); /* Dont allow SIGHUP or SIGINT */ - filepos= ((share->state.dellink != HA_OFFSET_ERROR && - !info->append_insert_at_end) ? - share->state.dellink : - info->state->data_file_length); if (share->base.reloc == (ha_rows) 1 && share->base.records == (ha_rows) 1 && @@ -86,14 +101,26 @@ int maria_write(MARIA_HA *info, byte *record) for (i=0 ; i < share->state.header.uniques ; i++) { if (_ma_check_unique(info,share->uniqueinfo+i,record, - _ma_unique_hash(share->uniqueinfo+i,record), - HA_OFFSET_ERROR)) + _ma_unique_hash(share->uniqueinfo+i,record), + HA_OFFSET_ERROR)) goto err2; } - /* Write all keys to indextree */ + if ((info->opt_flag & OPT_NO_ROWS)) + filepos= HA_OFFSET_ERROR; + else + { + /* + This may either calculate a record or, or write the record and return + the record id + */ + if ((filepos= (*share->write_record_init)(info, record)) == + HA_OFFSET_ERROR) + goto err2; + } - buff=info->lastkey2; + /* Write all keys to indextree */ + buff= info->lastkey2; for (i=0 ; i < share->base.keys ; i++) { if (maria_is_key_active(share->state.key_map, i)) @@ -136,13 +163,13 @@ int maria_write(MARIA_HA *info, byte *record) rw_unlock(&share->key_root_lock[i]); } } - if (share->calc_checksum) - info->checksum=(*share->calc_checksum)(info,record); - if (!(info->opt_flag & OPT_NO_ROWS)) + if (share->calc_write_checksum) + info->cur_row.checksum= (*share->calc_write_checksum)(info,record); + if (filepos != HA_OFFSET_ERROR) { if ((*share->write_record)(info,record)) goto err; - info->state->checksum+=info->checksum; + info->state->checksum+= info->cur_row.checksum; } if (share->base.auto_key) set_if_bigger(info->s->state.auto_increment, @@ -150,7 +177,7 @@ int maria_write(MARIA_HA *info, byte *record) info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); info->state->records++; - info->lastpos=filepos; + info->cur_row.lastpos= filepos; VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); if (info->invalidator != 0) { @@ -162,8 +189,10 @@ int maria_write(MARIA_HA *info, byte *record) DBUG_RETURN(0); err: - save_errno=my_errno; - if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_RECORD_FILE_FULL || + save_errno= my_errno; + fatal_error= 0; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || + my_errno == HA_ERR_RECORD_FILE_FULL || my_errno == HA_ERR_NULL_IN_SPATIAL) { if (info->bulk_insert) @@ -207,14 +236,21 @@ err: } } else + fatal_error= 1; + + if ((*share->write_record_abort)(info)) + fatal_error= 1; + if (fatal_error) { maria_print_error(info->s, HA_ERR_CRASHED); maria_mark_crashed(info); } + info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); my_errno=save_errno; err2: save_errno=my_errno; + DBUG_PRINT("error", ("got error: %d", save_errno)); VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); @@ -223,7 +259,7 @@ err2: /* Write one key to btree */ -int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, uint key_length) +int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, uint key_length) { DBUG_ENTER("_ma_ck_write"); @@ -242,7 +278,7 @@ int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, uint key_length) * Normal insert code * **********************************************************************/ -int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key, +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, byte *key, uint key_length) { int error; @@ -275,15 +311,17 @@ int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key, DBUG_RETURN(error); } /* _ma_ck_write_btree */ + int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, uint key_length, my_off_t *root, uint comp_flag) + byte *key, uint key_length, my_off_t *root, + uint comp_flag) { int error; DBUG_ENTER("_ma_ck_real_write_btree"); /* key_length parameter is used only if comp_flag is SEARCH_FIND */ if (*root == HA_OFFSET_ERROR || (error=w_search(info, keyinfo, comp_flag, key, key_length, - *root, (uchar *) 0, (uchar*) 0, + *root, (byte*) 0, (byte*) 0, (my_off_t) 0, 1)) > 0) error= _ma_enlarge_root(info,keyinfo,key,root); DBUG_RETURN(error); @@ -292,7 +330,7 @@ int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Make a new root with key as only pointer */ -int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, +int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, my_off_t *root) { uint t_length,nod_flag; @@ -302,11 +340,11 @@ int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0; _ma_kpointer(info,info->buff+2,*root); /* if nod */ - t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar*) 0, - (uchar*) 0, (uchar*) 0, key,&s_temp); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte*) 0, + (byte*) 0, (byte*) 0, key,&s_temp); maria_putint(info->buff,t_length+2+nod_flag,nod_flag); (*keyinfo->store_key)(keyinfo,info->buff+2+nod_flag,&s_temp); - info->buff_used=info->page_changed=1; /* info->buff is used */ + info->keybuff_used=info->page_changed=1; /* info->buff is used */ if ((*root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || _ma_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff)) DBUG_RETURN(-1); @@ -322,21 +360,21 @@ int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, */ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uint comp_flag, uchar *key, uint key_length, my_off_t page, - uchar *father_buff, uchar *father_keypos, + uint comp_flag, byte *key, uint key_length, my_off_t page, + byte *father_buff, byte *father_keypos, my_off_t father_page, my_bool insert_last) { int error,flag; uint nod_flag, search_key_length; - uchar *temp_buff,*keypos; - uchar keybuff[HA_MAX_KEY_BUFF]; + byte *temp_buff,*keypos; + byte keybuff[HA_MAX_KEY_BUFF]; my_bool was_last_key; - my_off_t next_page, dupp_key_pos; + my_off_t next_page, dup_key_pos; DBUG_ENTER("w_search"); DBUG_PRINT("enter",("page: %ld",page)); search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; - if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ + if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length+ HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_ma_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) @@ -351,9 +389,9 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, /* get position to record with duplicated key */ tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff); if (tmp_key_length) - dupp_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length); + dup_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length); else - dupp_key_pos= HA_OFFSET_ERROR; + dup_key_pos= HA_OFFSET_ERROR; if (keyinfo->flag & HA_FULLTEXT) { @@ -373,7 +411,7 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, else { /* popular word. two-level tree. going down */ - my_off_t root=dupp_key_pos; + my_off_t root=dup_key_pos; keyinfo=&info->s->ft2_keyinfo; get_key_full_length_rdonly(off, key); key+=off; @@ -392,7 +430,7 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, } else /* not HA_FULLTEXT, normal HA_NOSAME key */ { - info->dupp_key_pos= dupp_key_pos; + info->dup_key_pos= dup_key_pos; my_afree((byte*) temp_buff); my_errno=HA_ERR_FOUND_DUPP_KEY; DBUG_RETURN(-1); @@ -447,25 +485,25 @@ err: */ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff, - uchar *father_buff, uchar *father_key_pos, my_off_t father_page, + byte *key, byte *anc_buff, byte *key_pos, byte *key_buff, + byte *father_buff, byte *father_key_pos, my_off_t father_page, my_bool insert_last) { uint a_length,nod_flag; int t_length; - uchar *endpos, *prev_key; + byte *endpos, *prev_key; MARIA_KEY_PARAM s_temp; DBUG_ENTER("_ma_insert"); - DBUG_PRINT("enter",("key_pos: %lx",key_pos)); + DBUG_PRINT("enter",("key_pos: 0x%lx", (ulong) key_pos)); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key, USE_WHOLE_KEY);); nod_flag=_ma_test_if_nod(anc_buff); a_length=maria_getint(anc_buff); endpos= anc_buff+ a_length; - prev_key=(key_pos == anc_buff+2+nod_flag ? (uchar*) 0 : key_buff); + prev_key=(key_pos == anc_buff+2+nod_flag ? (byte*) 0 : key_buff); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, - (key_pos == endpos ? (uchar*) 0 : key_pos), + (key_pos == endpos ? (byte*) 0 : key_pos), prev_key, prev_key, key,&s_temp); #ifndef DBUG_OFF @@ -478,7 +516,7 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, { DBUG_PRINT("test",("t_length: %d ref_len: %d", t_length,s_temp.ref_length)); - DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: %lx", + DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx", s_temp.n_ref_length,s_temp.n_length,s_temp.key)); } #endif @@ -517,19 +555,20 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, Let's consider converting. We'll compare 'key' and the first key at anc_buff */ - uchar *a=key, *b=anc_buff+2+nod_flag; + byte *a=key, *b=anc_buff+2+nod_flag; uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength; /* the very first key on the page is always unpacked */ DBUG_ASSERT((*b & 128) == 0); #if HA_FT_MAXLEN >= 127 blen= mi_uint2korr(b); b+=2; #else - blen= *b++; + blen= *(uchar*) b++; #endif get_key_length(alen,a); DBUG_ASSERT(info->ft1_to_ft2==0); if (alen == blen && - ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) + ha_compare_text(keyinfo->seg->charset, (uchar*) a, alen, + (uchar*) b, blen, 0, 0) == 0) { /* yup. converting */ info->ft1_to_ft2=(DYNAMIC_ARRAY *) @@ -570,11 +609,11 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, /* split a full page in two and assign emerging item to key */ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, - uchar *key, uchar *buff, uchar *key_buff, + byte *key, byte *buff, byte *key_buff, my_bool insert_last_key) { uint length,a_length,key_ref_length,t_length,nod_flag,key_length; - uchar *key_pos,*pos, *after_key; + byte *key_pos,*pos, *after_key; my_off_t new_pos; MARIA_KEY_PARAM s_temp; DBUG_ENTER("maria_split_page"); @@ -582,7 +621,7 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (info->s->keyinfo+info->lastinx == keyinfo) info->page_changed=1; /* Info->buff is used */ - info->buff_used=1; + info->keybuff_used=1; nod_flag=_ma_test_if_nod(buff); key_ref_length=2+nod_flag; if (insert_last_key) @@ -614,8 +653,8 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff)) DBUG_RETURN(-1); - t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar *) 0, - (uchar*) 0, (uchar*) 0, + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte *) 0, + (byte*) 0, (byte*) 0, key_buff, &s_temp); length=(uint) ((buff+a_length)-key_pos); memcpy((byte*) info->buff+key_ref_length+t_length,(byte*) key_pos, @@ -638,12 +677,12 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, after_key will contain the position to where the next key starts */ -uchar *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint *return_key_length, - uchar **after_key) +byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key) { uint keys,length,key_ref_length; - uchar *end,*lastpos; + byte *end,*lastpos; DBUG_ENTER("_ma_find_half_pos"); key_ref_length=2+nod_flag; @@ -672,24 +711,25 @@ uchar *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, uchar *page, } while (page < end); *return_key_length=length; *after_key=page; - DBUG_PRINT("exit",("returns: %lx page: %lx half: %lx",lastpos,page,end)); + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx", + lastpos, page, end)); DBUG_RETURN(lastpos); } /* _ma_find_half_pos */ - /* - Split buffer at last key - Returns pointer to the start of the key before the last key - key will contain the last key - */ +/* + Split buffer at last key + Returns pointer to the start of the key before the last key + key will contain the last key +*/ -static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, - uchar *key, uint *return_key_length, - uchar **after_key) +static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key) { uint keys,length,last_length,key_ref_length; - uchar *end,*lastpos,*prevpos; - uchar key_buff[HA_MAX_KEY_BUFF]; + byte *end,*lastpos,*prevpos; + byte key_buff[HA_MAX_KEY_BUFF]; DBUG_ENTER("_ma_find_last_pos"); key_ref_length=2; @@ -727,7 +767,8 @@ static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, } *return_key_length=last_length; *after_key=lastpos; - DBUG_PRINT("exit",("returns: %lx page: %lx end: %lx",prevpos,page,end)); + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx", + prevpos, page, end)); DBUG_RETURN(prevpos); } /* _ma_find_last_pos */ @@ -736,14 +777,14 @@ static uchar *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, uchar *page, /* returns 0 if balance was done */ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, uchar *curr_buff, uchar *father_buff, - uchar *father_key_pos, my_off_t father_page) + byte *key, byte *curr_buff, byte *father_buff, + byte *father_key_pos, my_off_t father_page) { my_bool right; uint k_length,father_length,father_keylength,nod_flag,curr_keylength, right_length,left_length,new_right_length,new_left_length,extra_length, length,keys; - uchar *pos,*buff,*extra_buff; + byte *pos,*buff,*extra_buff; my_off_t next_page,new_pos; byte tmp_part_key[HA_MAX_KEY_BUFF]; DBUG_ENTER("_ma_balance_page"); @@ -880,7 +921,8 @@ typedef struct { uint keynr; } bulk_insert_param; -int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, uchar *key, + +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, byte *key, uint key_length) { int error; @@ -896,22 +938,22 @@ int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, uchar *key, /* typeof(_ma_keys_compare)=qsort_cmp2 */ -static int keys_compare(bulk_insert_param *param, uchar *key1, uchar *key2) +static int keys_compare(bulk_insert_param *param, byte *key1, byte *key2) { uint not_used[2]; return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg, - key1, key2, USE_WHOLE_KEY, SEARCH_SAME, + (uchar*) key1, (uchar*) key2, USE_WHOLE_KEY, SEARCH_SAME, not_used); } -static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) +static int keys_free(byte *key, TREE_FREE mode, bulk_insert_param *param) { /* Probably I can use info->lastkey here, but I'm not sure, and to be safe I'd better use local lastkey. */ - uchar lastkey[HA_MAX_KEY_BUFF]; + byte lastkey[HA_MAX_KEY_BUFF]; uint keylen; MARIA_KEYDEF *keyinfo; diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index e423a3f5c36..8ea00fa9776 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -36,21 +36,23 @@ SET_STACK_SIZE(9000) /* Minimum stack size for program */ static uint decode_bits; static char **default_argv; -static const char *load_default_groups[]= { "mariachk", 0 }; +static const char *load_default_groups[]= { "maria_chk", 0 }; static const char *set_collation_name, *opt_tmpdir; static CHARSET_INFO *set_collation; static long opt_maria_block_size; static long opt_key_cache_block_size; static const char *my_progname_short; static int stopwords_inited= 0; -static MY_TMPDIR mariachk_tmpdir; +static MY_TMPDIR maria_chk_tmpdir; static const char *type_names[]= -{ "impossible","char","binary", "short", "long", "float", +{ + "impossible","char","binary", "short", "long", "float", "double","number","unsigned short", "unsigned long","longlong","ulonglong","int24", "uint24","int8","varchar", "varbin","?", - "?"}; + "?" +}; static const char *prefix_packed_txt="packed ", *bin_packed_txt="prefix ", @@ -59,23 +61,30 @@ static const char *prefix_packed_txt="packed ", *blob_txt="BLOB "; static const char *field_pack[]= -{"","no endspace", "no prespace", +{ + "","no endspace", "no prespace", "no zeros", "blob", "constant", "table-lockup", - "always zero","varchar","unique-hash","?","?"}; + "always zero","varchar","unique-hash","?","?" +}; + +static const char *record_formats[]= +{ + "Fixed length", "Packed", "Compressed", "Block", "?" +}; static const char *maria_stats_method_str="nulls_unequal"; static void get_options(int *argc,char * * *argv); static void print_version(void); static void usage(void); -static int mariachk(HA_CHECK *param, char *filename); +static int maria_chk(HA_CHECK *param, char *filename); static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name); static int maria_sort_records(HA_CHECK *param, register MARIA_HA *info, - my_string name, uint sort_key, - my_bool write_info, my_bool update_index); + my_string name, uint sort_key, + my_bool write_info, my_bool update_index); static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page,uchar *buff,uint sortkey, + my_off_t page, byte *buff,uint sortkey, File new_file, my_bool update_index); HA_CHECK check_param; @@ -88,7 +97,7 @@ int main(int argc, char **argv) MY_INIT(argv[0]); my_progname_short= my_progname+dirname_length(my_progname); - mariachk_init(&check_param); + maria_chk_init(&check_param); check_param.opt_lock_memory= 1; /* Lock memory if possible */ check_param.using_global_keycache = 0; get_options(&argc,(char***) &argv); @@ -98,7 +107,7 @@ int main(int argc, char **argv) while (--argc >= 0) { - int new_error=mariachk(&check_param, *(argv++)); + int new_error=maria_chk(&check_param, *(argv++)); if ((check_param.testflag & T_REP_ANY) != T_REP) check_param.testflag&= ~T_REP; VOID(fflush(stdout)); @@ -112,7 +121,7 @@ int main(int argc, char **argv) if (!(check_param.testflag & T_REP)) check_param.testflag|= T_REP_BY_SORT; check_param.testflag&= ~T_EXTEND; /* Don't needed */ - error|=mariachk(&check_param, argv[-1]); + error|=maria_chk(&check_param, argv[-1]); check_param.testflag= old_testflag; VOID(fflush(stdout)); VOID(fflush(stderr)); @@ -134,7 +143,7 @@ int main(int argc, char **argv) llstr(check_param.total_deleted,buff2)); } free_defaults(default_argv); - free_tmpdir(&mariachk_tmpdir); + free_tmpdir(&maria_chk_tmpdir); maria_end(); my_end(check_param.testflag & T_INFO ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); @@ -218,7 +227,7 @@ static struct my_option my_long_options[] = (gptr*) &check_param.keys_in_use, 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, {"max-record-length", OPT_MAX_RECORD_LENGTH, - "Skip rows bigger than this if mariachk can't allocate memory to hold it", + "Skip rows bigger than this if maria_chk can't allocate memory to hold it", (gptr*) &check_param.max_record_length, (gptr*) &check_param.max_record_length, 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, @@ -259,7 +268,7 @@ static struct my_option my_long_options[] = "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"silent", 's', - "Only print errors. One can use two -s to make mariachk very silent.", + "Only print errors. One can use two -s to make maria_chk very silent.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"sort-index", 'S', "Sort index blocks. This speeds up 'read-next' in applications.", @@ -346,7 +355,7 @@ static struct my_option my_long_options[] = static void print_version(void) { - printf("%s Ver 2.7 for %s at %s\n", my_progname, SYSTEM_TYPE, + printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE); NETWARE_SET_SCREEN_MODE(1); } @@ -381,7 +390,7 @@ static void usage(void) printf(", they will be used\n\ in a round-robin fashion.\n\ -s, --silent Only print errors. One can use two -s to make\n\ - mariachk very silent.\n\ + maria_chk very silent.\n\ -v, --verbose Print more information. This can be used with\n\ --description and --check. Use many -v for more verbosity.\n\ -V, --version Print version and exit.\n\ @@ -390,10 +399,10 @@ static void usage(void) puts(" --start-check-pos=# Start reading file at given offset.\n"); #endif - puts("Check options (check is the default action for mariachk):\n\ + puts("Check options (check is the default action for maria_chk):\n\ -c, --check Check table for errors.\n\ -e, --extend-check Check the table VERY throughly. Only use this in\n\ - extreme cases as mariachk should normally be able to\n\ + extreme cases as maria_chk should normally be able to\n\ find out if the table is ok even without this switch.\n\ -F, --fast Check only tables that haven't been closed properly.\n\ -C, --check-only-changed\n\ @@ -419,7 +428,7 @@ static void usage(void) bit mask of which keys to use. This can be used to\n\ get faster inserts.\n\ --max-record-length=#\n\ - Skip rows bigger than this if mariachk can't allocate\n\ + Skip rows bigger than this if maria_chk can't allocate\n\ memory to hold it.\n\ -r, --recover Can fix almost anything except unique keys that aren't\n\ unique.\n\ @@ -436,7 +445,7 @@ static void usage(void) --set-collation=name\n\ Change the collation used by the index.\n\ -q, --quick Faster repair by not modifying the data file.\n\ - One can give a second '-q' to force mariachk to\n\ + One can give a second '-q' to force maria_chk to\n\ modify the original datafile in case of duplicate keys.\n\ NOTE: Tables where the data file is currupted can't be\n\ fixed with this option.\n\ @@ -684,7 +693,7 @@ get_one_option(int optid, } else { - DBUG_PUSH(argument ? argument : "d:t:o,/tmp/mariachk.trace"); + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); } break; case 'V': @@ -700,6 +709,7 @@ get_one_option(int optid, { int method; enum_handler_stats_method method_conv; + LINT_INIT(method_conv); maria_stats_method_str= argument; if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0) { @@ -778,10 +788,10 @@ static void get_options(register int *argc,register char ***argv) exit(1); } - if (init_tmpdir(&mariachk_tmpdir, opt_tmpdir)) + if (init_tmpdir(&maria_chk_tmpdir, opt_tmpdir)) exit(1); - check_param.tmpdir=&mariachk_tmpdir; + check_param.tmpdir=&maria_chk_tmpdir; check_param.key_cache_block_size= opt_key_cache_block_size; if (set_collation_name) @@ -796,17 +806,16 @@ static void get_options(register int *argc,register char ***argv) /* Check table */ -static int mariachk(HA_CHECK *param, my_string filename) +static int maria_chk(HA_CHECK *param, my_string filename) { int error,lock_type,recreate; int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); - uint raid_chunks; MARIA_HA *info; File datafile; char llbuff[22],llbuff2[22]; my_bool state_updated=0; MARIA_SHARE *share; - DBUG_ENTER("mariachk"); + DBUG_ENTER("maria_chk"); param->out_flag=error=param->warning_printed=param->error_printed= recreate=0; @@ -849,7 +858,8 @@ static int mariachk(HA_CHECK *param, my_string filename) _ma_check_print_error(param,"File '%s' doesn't exist",filename); break; case EACCES: - _ma_check_print_error(param,"You don't have permission to use '%s'",filename); + _ma_check_print_error(param,"You don't have permission to use '%s'", + filename); break; default: _ma_check_print_error(param,"%d when opening MARIA-table '%s'", @@ -862,7 +872,18 @@ static int mariachk(HA_CHECK *param, my_string filename) share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ share->tot_locks-= share->r_locks; share->r_locks=0; - raid_chunks=share->base.raid_chunks; + + if (share->data_file_type == BLOCK_RECORD && + (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS | + T_CHECK | T_CHECK_ONLY_CHANGED))) + { + _ma_check_print_error(param, + "Record format used by '%s' is is not yet supported with repair/check", + filename); + param->error_printed= 0; + error= 1; + goto end2; + } /* Skip the checking of the file if: @@ -871,7 +892,8 @@ static int mariachk(HA_CHECK *param, my_string filename) */ if (param->testflag & (T_FAST | T_CHECK_ONLY_CHANGED)) { - my_bool need_to_check= maria_is_crashed(info) || share->state.open_count != 0; + my_bool need_to_check= (maria_is_crashed(info) || + share->state.open_count != 0); if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) && ((share->state.changed & (STATE_CHANGED | STATE_CRASHED | @@ -969,6 +991,7 @@ static int mariachk(HA_CHECK *param, my_string filename) _ma_check_print_error(param,"Can't lock indexfile of '%s', error: %d", filename,my_errno); param->error_printed=0; + error= 1; goto end2; } /* @@ -989,7 +1012,8 @@ static int mariachk(HA_CHECK *param, my_string filename) if (tmp != share->state.key_map) info->update|=HA_STATE_CHANGED; } - if (rep_quick && maria_chk_del(param, info, param->testflag & ~T_VERBOSE)) + if (rep_quick && + maria_chk_del(param, info, param->testflag & ~T_VERBOSE)) { if (param->testflag & T_FORCE_CREATE) { @@ -1032,8 +1056,7 @@ static int mariachk(HA_CHECK *param, my_string filename) { /* Change temp file to org file */ VOID(my_close(info->dfile,MYF(MY_WME))); /* Close new file */ error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, - raid_chunks, - MYF(0)); + MYF(0)); if (_ma_open_datafile(info,info->s, -1)) error=1; param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */ @@ -1112,8 +1135,7 @@ static int mariachk(HA_CHECK *param, my_string filename) 1, MYF(MY_WME))); maria_lock_memory(param); - if ((info->s->options & (HA_OPTION_PACK_RECORD | - HA_OPTION_COMPRESS_RECORD)) || + if ((info->s->data_file_type != STATIC_RECORD) || (param->testflag & (T_EXTEND | T_MEDIUM))) error|=maria_chk_data_link(param, info, param->testflag & T_EXTEND); error|=_ma_flush_blocks(param, share->key_cache, share->kfile); @@ -1163,12 +1185,11 @@ end2: { if (param->out_flag & O_NEW_DATA) error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, - raid_chunks, - ((param->testflag & T_BACKUP_DATA) ? - MYF(MY_REDEL_MAKE_BACKUP) : MYF(0))); + ((param->testflag & T_BACKUP_DATA) ? + MYF(MY_REDEL_MAKE_BACKUP) : MYF(0))); if (param->out_flag & O_NEW_INDEX) - error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT,0, - MYF(0)); + error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT, + MYF(0)); } VOID(fflush(stdout)); VOID(fflush(stderr)); if (param->error_printed) @@ -1195,10 +1216,10 @@ end2: filename)); VOID(fflush(stderr)); DBUG_RETURN(error); -} /* mariachk */ +} /* maria_chk */ - /* Write info about table */ +/* Write info about table */ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) { @@ -1212,14 +1233,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) char llbuff[22],llbuff2[22]; DBUG_ENTER("describe"); - printf("\nMARIA file: %s\n",name); - fputs("Record format: ",stdout); - if (share->options & HA_OPTION_COMPRESS_RECORD) - puts("Compressed"); - else if (share->options & HA_OPTION_PACK_RECORD) - puts("Packed"); - else - puts("Fixed length"); + printf("\nMARIA file: %s\n",name); + printf("Record format: %s\n", record_formats[share->data_file_type]); printf("Character set: %s (%d)\n", get_charset_name(share->state.header.language), share->state.header.language); @@ -1260,25 +1275,18 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) printf("Status: %s\n",buff); if (share->base.auto_key) { - printf("Auto increment key: %13d Last value: %13s\n", + printf("Auto increment key: %16d Last value: %18s\n", share->base.auto_key, llstr(share->state.auto_increment,llbuff)); } - if (share->base.raid_type) - { - printf("RAID: Type: %u Chunks: %u Chunksize: %lu\n", - share->base.raid_type, - share->base.raid_chunks, - share->base.raid_chunksize); - } if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) - printf("Checksum: %23s\n",llstr(info->state->checksum,llbuff)); + printf("Checksum: %26s\n",llstr(info->state->checksum,llbuff)); ; if (share->options & HA_OPTION_DELAY_KEY_WRITE) printf("Keys are only flushed at close\n"); } - printf("Data records: %13s Deleted blocks: %13s\n", + printf("Data records: %16s Deleted blocks: %18s\n", llstr(info->state->records,llbuff),llstr(info->state->del,llbuff2)); if (param->testflag & T_SILENT) DBUG_VOID_RETURN; /* This is enough */ @@ -1286,14 +1294,14 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) if (param->testflag & T_VERBOSE) { #ifdef USE_RELOC - printf("Init-relocation: %13s\n",llstr(share->base.reloc,llbuff)); + printf("Init-relocation: %16s\n",llstr(share->base.reloc,llbuff)); #endif - printf("Datafile parts: %13s Deleted data: %13s\n", + printf("Datafile parts: %16s Deleted data: %18s\n", llstr(share->state.split,llbuff), llstr(info->state->empty,llbuff2)); - printf("Datafile pointer (bytes):%9d Keyfile pointer (bytes):%9d\n", + printf("Datafile pointer (bytes): %11d Keyfile pointer (bytes): %13d\n", share->rec_reflength,share->base.key_reflength); - printf("Datafile length: %13s Keyfile length: %13s\n", + printf("Datafile length: %16s Keyfile length: %18s\n", llstr(info->state->data_file_length,llbuff), llstr(info->state->key_file_length,llbuff2)); @@ -1303,13 +1311,13 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) { if (share->base.max_data_file_length != HA_OFFSET_ERROR || share->base.max_key_file_length != HA_OFFSET_ERROR) - printf("Max datafile length: %13s Max keyfile length: %13s\n", + printf("Max datafile length: %16s Max keyfile length: %18s\n", llstr(share->base.max_data_file_length-1,llbuff), llstr(share->base.max_key_file_length-1,llbuff2)); } } - - printf("Recordlength: %13d\n",(int) share->base.pack_reclength); + printf("Block_size: %16d\n",(int) share->block_size); + printf("Recordlength: %16d\n",(int) share->base.pack_reclength); if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) { longlong2str(share->state.key_map,buff,2); @@ -1417,7 +1425,7 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) if (share->options & HA_OPTION_COMPRESS_RECORD) printf(" Huff tree Bits"); VOID(putchar('\n')); - start=1; + for (field=0 ; field < share->base.fields ; field++) { if (share->options & HA_OPTION_COMPRESS_RECORD) @@ -1446,8 +1454,9 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) sprintf(null_bit,"%d",share->rec[field].null_bit); sprintf(null_pos,"%d",share->rec[field].null_pos+1); } - printf("%-6d%-6d%-7s%-8s%-8s%-35s",field+1,start,length, - null_pos, null_bit, buff); + printf("%-6d%-6u%-7s%-8s%-8s%-35s",field+1, + (uint) share->rec[field].offset+1, + length, null_pos, null_bit, buff); if (share->options & HA_OPTION_COMPRESS_RECORD) { if (share->rec[field].huff_tree) @@ -1475,7 +1484,7 @@ static int maria_sort_records(HA_CHECK *param, uint key; MARIA_KEYDEF *keyinfo; File new_file; - uchar *temp_buff; + byte *temp_buff; ha_rows old_record_count; MARIA_SHARE *share=info->s; char llbuff[22],llbuff2[22]; @@ -1532,7 +1541,7 @@ static int maria_sort_records(HA_CHECK *param, goto err; info->opt_flag|=WRITE_CACHE_USED; - if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length))) { _ma_check_print_error(param,"Not enough memory for key block"); goto err; @@ -1544,14 +1553,11 @@ static int maria_sort_records(HA_CHECK *param, goto err; } fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32); - new_file=my_raid_create(fn_format(param->temp_filename, - param->temp_filename,"", - DATA_TMP_EXT,2+4), - 0,param->tmpfile_createflag, - share->base.raid_type, - share->base.raid_chunks, - share->base.raid_chunksize, - MYF(0)); + new_file= my_create(fn_format(param->temp_filename, + param->temp_filename,"", + DATA_TMP_EXT,2+4), + 0,param->tmpfile_createflag, + MYF(0)); if (new_file < 0) { _ma_check_print_error(param,"Can't create new tempfile: '%s'", @@ -1568,7 +1574,7 @@ static int maria_sort_records(HA_CHECK *param, for (key=0 ; key < share->base.keys ; key++) share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME; - if (my_pread(share->kfile,(byte*) temp_buff, + if (my_pread(share->kfile, temp_buff, (uint) keyinfo->block_length, share->state.key_root[sort_key], MYF(MY_NABP+MY_WME))) @@ -1589,7 +1595,8 @@ static int maria_sort_records(HA_CHECK *param, if (sort_info.new_data_file_type != COMPRESSED_RECORD) info->state->checksum=0; - if (sort_record_index(&sort_param,info,keyinfo,share->state.key_root[sort_key], + if (sort_record_index(&sort_param,info,keyinfo, + share->state.key_root[sort_key], temp_buff, sort_key,new_file,update_index) || maria_write_data_suffix(&sort_info,1) || flush_io_cache(&info->rec_cache)) @@ -1626,8 +1633,7 @@ err: { VOID(end_io_cache(&info->rec_cache)); (void) my_close(new_file,MYF(MY_WME)); - (void) my_raid_delete(param->temp_filename, share->base.raid_chunks, - MYF(MY_WME)); + (void) my_delete(param->temp_filename, MYF(MY_WME)); } if (temp_buff) { @@ -1644,17 +1650,17 @@ err: } /* sort_records */ - /* Sort records recursive using one index */ +/* Sort records recursive using one index */ static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page, uchar *buff, uint sort_key, + my_off_t page, byte *buff, uint sort_key, File new_file,my_bool update_index) { uint nod_flag,used_length,key_length; - uchar *temp_buff,*keypos,*endpos; + byte *temp_buff,*keypos,*endpos; my_off_t next_page,rec_pos; - uchar lastkey[HA_MAX_KEY_BUFF]; + byte lastkey[HA_MAX_KEY_BUFF]; char llbuff[22]; MARIA_SORT_INFO *sort_info= sort_param->sort_info; HA_CHECK *param=sort_info->param; @@ -1665,7 +1671,7 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, if (nod_flag) { - if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length))) + if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length))) { _ma_check_print_error(param,"Not Enough memory"); DBUG_RETURN(-1); @@ -1679,7 +1685,7 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, _sanity(__FILE__,__LINE__); if (nod_flag) { - next_page= _ma_kpos(nod_flag,keypos); + next_page= _ma_kpos(nod_flag, keypos); if (my_pread(info->s->kfile,(byte*) temp_buff, (uint) keyinfo->block_length, next_page, MYF(MY_NABP+MY_WME))) @@ -1688,7 +1694,8 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, llstr(next_page,llbuff)); goto err; } - if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff,sort_key, + if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff, + sort_key, new_file, update_index)) goto err; } @@ -1699,7 +1706,7 @@ static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, break; rec_pos= _ma_dpos(info,0,lastkey+key_length); - if ((*info->s->read_rnd)(info,sort_param->record,rec_pos,0)) + if ((*info->s->read_record)(info,sort_param->record,rec_pos)) { _ma_check_print_error(param,"%d when reading datafile",my_errno); goto err; @@ -1738,7 +1745,7 @@ err: /* - Check if mariachk was killed by a signal + Check if maria_chk was killed by a signal This is overloaded by other programs that want to be able to abort sorting */ diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index ecd93807a06..778c5817e4f 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -55,14 +55,15 @@ typedef struct st_maria_state_info uchar keys; /* number of keys in file */ uchar uniques; /* number of UNIQUE definitions */ uchar language; /* Language for indexes */ - uchar max_block_size_index; /* max keyblock size */ uchar fulltext_keys; - uchar not_used; /* To align to 8 */ + uchar data_file_type; + uchar org_data_file_type; /* Used by mariapack to store dft */ } header; MARIA_STATUS_INFO state; ha_rows split; /* number of split blocks */ my_off_t dellink; /* Link to next removed block */ + ulonglong first_bitmap_with_space; ulonglong auto_increment; ulong process; /* process that updated table last */ ulong unique; /* Unique number for this process */ @@ -70,7 +71,7 @@ typedef struct st_maria_state_info ulong status; ulong *rec_per_key_part; my_off_t *key_root; /* Start of key trees */ - my_off_t *key_del; /* delete links for trees */ + my_off_t key_del; /* delete links for trees */ my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ ulong sec_index_changed; /* Updated when new sec_index */ @@ -91,60 +92,73 @@ typedef struct st_maria_state_info } MARIA_STATE_INFO; -#define MARIA_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) +#define MARIA_STATE_INFO_SIZE (24 + 4 + 11*8 + 4*4 + 8 + 3*4 + 5*8) #define MARIA_STATE_KEY_SIZE 8 #define MARIA_STATE_KEYBLOCK_SIZE 8 #define MARIA_STATE_KEYSEG_SIZE 4 -#define MARIA_STATE_EXTRA_SIZE ((MARIA_MAX_KEY+MARIA_MAX_KEY_BLOCK_SIZE)*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE) +#define MARIA_STATE_EXTRA_SIZE (MARIA_MAX_KEY*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE) #define MARIA_KEYDEF_SIZE (2+ 5*2) #define MARIA_UNIQUEDEF_SIZE (2+1+1) #define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) -#define MARIA_COLUMNDEF_SIZE (2*3+1) -#define MARIA_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) +#define MARIA_COLUMNDEF_SIZE (6+2+2+2+2+2+1+1) +#define MARIA_BASE_INFO_SIZE (5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16) #define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ -typedef struct st__ma_base_info +typedef struct st_ma_base_info { - my_off_t keystart; /* Start of keys */ + my_off_t keystart; /* Start of keys */ my_off_t max_data_file_length; my_off_t max_key_file_length; my_off_t margin_key_file_length; - ha_rows records, reloc; /* Create information */ - ulong mean_row_length; /* Create information */ - ulong reclength; /* length of unpacked record */ - ulong pack_reclength; /* Length of full packed rec */ + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec */ ulong min_pack_length; - ulong max_pack_length; /* Max possibly length of - packed rec. */ + ulong max_pack_length; /* Max possibly length of packed rec */ ulong min_block_length; - ulong fields, /* fields in table */ - pack_fields; /* packed fields in table */ - uint rec_reflength; /* = 2-8 */ - uint key_reflength; /* = 2-8 */ - uint keys; /* same as in state.header */ - uint auto_key; /* Which key-1 is a auto key */ - uint blobs; /* Number of blobs */ - uint pack_bits; /* Length of packed bits */ - uint max_key_block_length; /* Max block length */ - uint max_key_length; /* Max key length */ + uint fields; /* fields in table */ + uint fixed_not_null_fields; + uint fixed_not_null_fields_length; + uint max_field_lengths; + uint pack_fields; /* packed fields in table */ + uint varlength_fields; /* char/varchar/blobs */ + uint rec_reflength; /* = 2-8 */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + /* Length of packed bits (when table was created first time) */ + uint pack_bytes; + /* Length of null bits (when table was created first time) */ + uint original_null_bytes; + uint null_bytes; /* Null bytes in record */ + uint field_offsets; /* Number of field offsets */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ /* Extra allocation when using dynamic record format */ uint extra_alloc_bytes; uint extra_alloc_procent; - /* Info about raid */ - uint raid_type, raid_chunks; - ulong raid_chunksize; + uint is_nulls_extended; /* 1 if new null bytes */ + uint min_row_length; + uint default_row_flag; /* 0 or ROW_FLAG_NULLS_EXTENDED */ + uint block_size; + uint default_rec_buff_size; + uint extra_rec_buff_size; + /* The following are from the header */ uint key_parts, all_key_parts; + my_bool transactional; } MARIA_BASE_INFO; - /* Structs used intern in database */ +/* Structs used intern in database */ -typedef struct st_maria_blob /* Info of record */ +typedef struct st_maria_blob /* Info of record */ { - ulong offset; /* Offset to blob in record */ - uint pack_length; /* Type of packed length */ - ulong length; /* Calc:ed for each record */ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ } MARIA_BLOB; @@ -155,6 +169,26 @@ typedef struct st_maria_pack uchar version; } MARIA_PACK; +typedef struct st_maria_file_bitmap +{ + uchar *map; + ulonglong page; /* Page number for current bitmap */ + uint used_size; /* Size of bitmap that is not 0 */ + File file; + + my_bool changed; + +#ifdef THREAD + pthread_mutex_t bitmap_lock; +#endif + /* Constants, allocated when initiating bitmaps */ + uint sizes[8]; /* Size per bit combination */ + uint total_size; /* Total usable size of bitmap page */ + uint block_size; /* Block size of file */ + ulong pages_covered; /* Pages covered by bitmap + 1 */ +} MARIA_FILE_BITMAP; + + #define MAX_NONMAPPED_INSERTS 1000 typedef struct st_maria_share @@ -175,28 +209,36 @@ typedef struct st_maria_share symlinks */ *index_file_name; byte *file_map; /* mem-map of file if possible */ - KEY_CACHE *key_cache; /* ref to the current key cache - */ + KEY_CACHE *key_cache; /* ref to the current key cache */ MARIA_DECODE_TREE *decode_trees; uint16 *decode_tables; - int(*read_record) (struct st_maria_info *, my_off_t, byte *); - int(*write_record) (struct st_maria_info *, const byte *); - int(*update_record) (struct st_maria_info *, my_off_t, const byte *); - int(*delete_record) (struct st_maria_info *); - int(*read_rnd) (struct st_maria_info *, byte *, my_off_t, my_bool); - int(*compare_record) (struct st_maria_info *, const byte *); - ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); - int(*compare_unique) (struct st_maria_info *, MARIA_UNIQUEDEF *, - const byte *record, my_off_t pos); - uint(*file_read) (MARIA_HA *, byte *, uint, my_off_t, myf); - uint(*file_write) (MARIA_HA *, byte *, uint, my_off_t, myf); + my_bool (*once_init)(struct st_maria_share *, File); + my_bool (*once_end)(struct st_maria_share *); + my_bool (*init)(struct st_maria_info *); + void (*end)(struct st_maria_info *); + int (*read_record)(struct st_maria_info *, byte *, MARIA_RECORD_POS); + my_bool (*scan_init)(struct st_maria_info *); + int (*scan)(struct st_maria_info *, byte *, MARIA_RECORD_POS, my_bool); + void (*scan_end)(struct st_maria_info *); + MARIA_RECORD_POS (*write_record_init)(struct st_maria_info *, const byte *); + my_bool (*write_record)(struct st_maria_info *, const byte *); + my_bool (*write_record_abort)(struct st_maria_info *); + my_bool (*update_record)(struct st_maria_info *, MARIA_RECORD_POS, + const byte *); + my_bool (*delete_record)(struct st_maria_info *); + my_bool (*compare_record)(struct st_maria_info *, const byte *); + ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); + ha_checksum(*calc_write_checksum) (struct st_maria_info *, const byte *); + my_bool (*compare_unique) (struct st_maria_info *, MARIA_UNIQUEDEF *, + const byte *record, MARIA_RECORD_POS pos); + uint(*file_read) (MARIA_HA *, byte *, uint, my_off_t, myf); + uint(*file_write) (MARIA_HA *, byte *, uint, my_off_t, myf); invalidator_by_filename invalidator; /* query cache invalidator */ ulong this_process; /* processid */ ulong last_process; /* For table-change-check */ ulong last_version; /* Version on start */ ulong options; /* Options used */ - ulong min_pack_length; /* Theese are used by packed - data */ + ulong min_pack_length; /* These are used by packed data */ ulong max_pack_length; ulong state_diff_length; uint rec_reflength; /* rec_reflength in use now */ @@ -208,21 +250,24 @@ typedef struct st_maria_share int mode; /* mode of file on open */ uint reopen; /* How many times reopened */ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ - uint blocksize; /* blocksize of keyfile */ + uint block_size; /* block_size of keyfile & data file*/ + uint base_length; myf write_flag; enum data_file_type data_file_type; + my_bool temporary; my_bool changed, /* If changed since lock */ global_changed, /* If changed since open */ - not_flushed, temporary, delay_key_write, concurrent_insert; + not_flushed, concurrent_insert; + my_bool delay_key_write; #ifdef THREAD THR_LOCK lock; - pthread_mutex_t intern_lock; /* Locking for use with - _locking */ + pthread_mutex_t intern_lock; /* Locking for use with _locking */ rw_lock_t *key_root_lock; #endif my_off_t mmaped_length; uint nonmmaped_inserts; /* counter of writing in non-mmaped area */ + MARIA_FILE_BITMAP bitmap; rw_lock_t mmap_lock; } MARIA_SHARE; @@ -237,45 +282,106 @@ typedef struct st_maria_bit_buff uint error; } MARIA_BIT_BUFF; +typedef byte MARIA_BITMAP_BUFFER; + +typedef struct st_maria_bitmap_block +{ + ulonglong page; /* Page number */ + /* Number of continuous pages. TAIL_BIT is set if this is a tail page */ + uint page_count; + uint empty_space; /* Set for head and tail pages */ + /* + Number of BLOCKS for block-region (holds all non-blob-fields or one blob) + */ + uint sub_blocks; + /* set to <> 0 in write_record() if this block was actually used */ + uint8 used; + uint8 org_bitmap_value; +} MARIA_BITMAP_BLOCK; + + +typedef struct st_maria_bitmap_blocks +{ + MARIA_BITMAP_BLOCK *block; + uint count; + my_bool tail_page_skipped; /* If some tail pages was not used */ + my_bool page_skipped; /* If some full pages was not used */ +} MARIA_BITMAP_BLOCKS; + + +/* Data about the currently read row */ +typedef struct st_maria_row +{ + MARIA_BITMAP_BLOCKS insert_blocks; + MARIA_BITMAP_BUFFER *extents; + MARIA_RECORD_POS lastpos, nextpos; + MARIA_RECORD_POS *tail_positions; + ha_checksum checksum; + byte *empty_bits, *field_lengths; + byte *empty_bits_buffer; /* For storing cur_row.empty_bits */ + uint *null_field_lengths; /* All null field lengths */ + ulong *blob_lengths; /* Length for each blob */ + ulong base_length, normal_length, char_length, varchar_length, blob_length; + ulong head_length, total_length; + my_size_t extents_buffer_length; /* Size of 'extents' buffer */ + uint field_lengths_length; /* Length of data in field_lengths */ + uint extents_count; /* number of extents in 'extents' */ + uint full_page_count, tail_count; /* For maria_chk */ +} MARIA_ROW; + +/* Data to scan row in blocked format */ +typedef struct st_maria_block_scan +{ + byte *bitmap_buff, *bitmap_pos, *bitmap_end, *page_buff; + byte *dir, *dir_end; + ulong bitmap_page; + ulonglong bits; + uint number_of_rows, bit_pos; + MARIA_RECORD_POS row_base_page; +} MARIA_BLOCK_SCAN; + + struct st_maria_info { MARIA_SHARE *s; /* Shared between open:s */ MARIA_STATUS_INFO *state, save_state; + MARIA_ROW cur_row, new_row; + MARIA_BLOCK_SCAN scan; MARIA_BLOB *blobs; /* Pointer to blobs */ MARIA_BIT_BUFF bit_buff; + DYNAMIC_ARRAY bitmap_blocks; /* accumulate indexfile changes between write's */ TREE *bulk_insert; DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ MEM_ROOT ft_memroot; /* used by the parser */ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ char *filename; /* parameter to open filename */ - uchar *buff, /* Temp area for key */ - *lastkey, *lastkey2; /* Last used search key */ - uchar *first_mbr_key; /* Searhed spatial key */ - byte *rec_buff; /* Tempbuff for recordpack */ - uchar *int_keypos, /* Save position for next/previous */ + byte *buff; /* page buffer */ + byte *keyread_buff; /* Buffer for last key read */ + byte *lastkey, *lastkey2; /* Last used search key */ + byte *first_mbr_key; /* Searhed spatial key */ + byte *rec_buff; /* Temp buffer for recordpack */ + byte *int_keypos, /* Save position for next/previous */ *int_maxpos; /* -""- */ uint int_nod_flag; /* -""- */ uint32 int_keytree_version; /* -""- */ - int(*read_record) (struct st_maria_info *, my_off_t, byte *); + int (*read_record) (struct st_maria_info *, byte*, MARIA_RECORD_POS); invalidator_by_filename invalidator; /* query cache invalidator */ ulong this_unique; /* uniq filenumber or thread */ ulong last_unique; /* last unique number */ ulong this_loop; /* counter for this open */ ulong last_loop; /* last used counter */ - my_off_t lastpos, /* Last record position */ - nextpos; /* Position to next record */ - my_off_t save_lastpos; + MARIA_RECORD_POS save_lastpos; + MARIA_RECORD_POS dup_key_pos; my_off_t pos; /* Intern variable */ my_off_t last_keypage; /* Last key page read */ my_off_t last_search_keypage; /* Last keypage when searching */ - my_off_t dupp_key_pos; - ha_checksum checksum; /* QQ: the folloing two xxx_length fields should be removed, as they are not compatible with parallel repair */ ulong packed_length, blob_length; /* Length of found, packed record */ + my_size_t rec_buff_size; int dfile; /* The datafile */ uint opt_flag; /* Optim. for space/speed */ uint update; /* If file changed since open */ @@ -298,19 +404,19 @@ struct st_maria_info my_bool was_locked; /* Was locked in panic */ my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool quick_mode; - /* If info->buff can't be used for rnext */ + /* If info->keyread_buff can't be used for rnext */ my_bool page_changed; - /* If info->buff has to be reread for rnext */ - my_bool buff_used; - my_bool once_flags; /* For MARIAMRG */ + /* If info->keyread_buff has to be reread for rnext */ + my_bool keybuff_used; + my_bool once_flags; /* For MARIA_MRG */ #ifdef THREAD THR_LOCK_DATA lock; #endif - uchar *maria_rtree_recursion_state; /* For RTREE */ + uchar *maria_rtree_recursion_state; /* For RTREE */ int maria_rtree_recursion_depth; }; -/* Some defines used by isam-funktions */ +/* Some defines used by maria-functions */ #define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _search() */ #define F_EXTRA_LCK -1 @@ -361,15 +467,15 @@ struct st_maria_info } #define get_key_full_length(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= ((uint) (uchar) *((key)++))+1; \ + { if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key)++))+1; \ else \ { length=mi_uint2korr((key)+1)+3; (key)+=3; } \ } #define get_key_full_length_rdonly(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= ((uint) (uchar) *((key)))+1; \ +{ if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key)))+1; \ else \ { length=mi_uint2korr((key)+1)+3; } \ } @@ -398,7 +504,6 @@ struct st_maria_info #define PACK_TYPE_ZERO_FILL 4 #define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ -#define MARIA_MAX_KEY_BLOCK_SIZE (MARIA_MAX_KEY_BLOCK_LENGTH/MARIA_MIN_KEY_BLOCK_LENGTH) #define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size)) #define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ #define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ @@ -428,85 +533,88 @@ extern LIST *maria_open_list; extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; extern uint maria_quick_table_bits; +extern byte maria_zero_string[]; /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ typedef struct st_maria_s_param { - uint ref_length, key_length, - n_ref_length, - n_length, totlength, part_of_prev_key, prev_length, pack_marker; - uchar *key, *prev_key, *next_key_pos; + uint ref_length, key_length, n_ref_length; + uint n_length, totlength, part_of_prev_key, prev_length, pack_marker; + const byte *key; + byte *prev_key, *next_key_pos; bool store_not_null; } MARIA_KEY_PARAM; /* Prototypes for intern functions */ -extern int _ma_read_dynamic_record(MARIA_HA *info, my_off_t filepos, - byte *buf); -extern int _ma_write_dynamic_record(MARIA_HA *, const byte *); -extern int _ma_update_dynamic_record(MARIA_HA *, my_off_t, const byte *); -extern int _ma_delete_dynamic_record(MARIA_HA *info); -extern int _ma_cmp_dynamic_record(MARIA_HA *info, const byte *record); -extern int _ma_read_rnd_dynamic_record(MARIA_HA *, byte *, my_off_t, +extern int _ma_read_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS); +extern int _ma_read_rnd_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS, my_bool); -extern int _ma_write_blob_record(MARIA_HA *, const byte *); -extern int _ma_update_blob_record(MARIA_HA *, my_off_t, const byte *); -extern int _ma_read_static_record(MARIA_HA *info, my_off_t filepos, - byte *buf); -extern int _ma_write_static_record(MARIA_HA *, const byte *); -extern int _ma_update_static_record(MARIA_HA *, my_off_t, const byte *); -extern int _ma_delete_static_record(MARIA_HA *info); -extern int _ma_cmp_static_record(MARIA_HA *info, const byte *record); -extern int _ma_read_rnd_static_record(MARIA_HA *, byte *, my_off_t, my_bool); -extern int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, +extern my_bool _ma_write_dynamic_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_dynamic_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *); +extern my_bool _ma_delete_dynamic_record(MARIA_HA *info); +extern my_bool _ma_cmp_dynamic_record(MARIA_HA *info, const byte *record); +extern my_bool _ma_write_blob_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_blob_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *); +extern int _ma_read_static_record(MARIA_HA *info, byte *, MARIA_RECORD_POS); +extern int _ma_read_rnd_static_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern my_bool _ma_write_static_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *); +extern my_bool _ma_delete_static_record(MARIA_HA *info); +extern my_bool _ma_cmp_static_record(MARIA_HA *info, const byte *record); +extern int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, uint length); extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, uint key_length, - my_off_t *root, uint comp_flag); + byte *key, uint key_length, + MARIA_RECORD_POS *root, uint comp_flag); extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, my_off_t *root); -extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, - uchar *anc_buff, uchar *key_pos, uchar *key_buff, - uchar *father_buff, uchar *father_keypos, + byte *key, MARIA_RECORD_POS *root); +extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + byte *anc_buff, byte *key_pos, byte *key_buff, + byte *father_buff, byte *father_keypos, my_off_t father_page, my_bool insert_last); extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, uchar *buff, uchar *key_buff, + byte *key, byte *buff, byte *key_buff, my_bool insert_last); -extern uchar *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, - uchar *page, uchar *key, +extern byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint *return_key_length, - uchar ** after_key); + byte ** after_key); extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, uchar *key, + byte *key_pos, byte *org_key, + byte *key_buff, const byte *key, MARIA_KEY_PARAM *s_temp); extern int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, uchar *key, + byte *key_pos, byte *org_key, + byte *key_buff, const byte *key, MARIA_KEY_PARAM *s_temp); extern int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, - uint nod_flag, uchar *key_pos, - uchar *org_key, uchar *prev_key, - uchar *key, + uint nod_flag, byte *key_pos, + byte *org_key, byte *prev_key, + const byte *key, MARIA_KEY_PARAM *s_temp); extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, - uint nod_flag, uchar *key_pos, - uchar *org_key, uchar *prev_key, - uchar *key, + uint nod_flag, byte *key_pos, + byte *org_key, byte *prev_key, + const byte *key, MARIA_KEY_PARAM *s_temp); -void _ma_store_static_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, +void _ma_store_static_key(MARIA_KEYDEF *keyinfo, byte *key_pos, MARIA_KEY_PARAM *s_temp); -void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, MARIA_KEY_PARAM *s_temp); #ifdef NOT_USED -void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, +void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, MARIA_KEY_PARAM *s_temp); #endif -void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos, +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, MARIA_KEY_PARAM *s_temp); -extern int _ma_ck_delete(MARIA_HA *info, uint keynr, uchar *key, +extern int _ma_ck_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length); extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer); extern int _ma_writeinfo(MARIA_HA *info, uint options); @@ -514,74 +622,69 @@ extern int _ma_test_if_changed(MARIA_HA *info); extern int _ma_mark_file_changed(MARIA_HA *info); extern int _ma_decrement_open_count(MARIA_HA *info); extern int _ma_check_index(MARIA_HA *info, int inx); -extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key, +extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, uint key_len, uint nextflag, my_off_t pos); extern int _ma_bin_search(struct st_maria_info *info, MARIA_KEYDEF *keyinfo, - uchar *page, uchar *key, uint key_len, - uint comp_flag, uchar **ret_pos, uchar *buff, + byte *page, byte *key, uint key_len, + uint comp_flag, byte **ret_pos, byte *buff, my_bool *was_last_key); extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *page, uchar *key, uint key_len, - uint comp_flag, uchar ** ret_pos, uchar *buff, + byte *page, byte *key, uint key_len, + uint comp_flag, byte ** ret_pos, byte *buff, my_bool *was_last_key); extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *page, uchar *key, uint key_len, - uint comp_flag, uchar ** ret_pos, uchar *buff, + byte *page, byte *key, uint key_len, + uint comp_flag, byte ** ret_pos, byte *buff, my_bool *was_last_key); -extern my_off_t _ma_kpos(uint nod_flag, uchar *after_key); -extern void _ma_kpointer(MARIA_HA *info, uchar *buff, my_off_t pos); -extern my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, uchar *after_key); -extern my_off_t _ma_rec_pos(MARIA_SHARE *info, uchar *ptr); -extern void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos); +extern my_off_t _ma_kpos(uint nod_flag, byte *after_key); +extern void _ma_kpointer(MARIA_HA *info, byte *buff, my_off_t pos); +extern MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag, + const byte *after_key); +extern MARIA_RECORD_POS _ma_rec_pos(MARIA_SHARE *info, byte *ptr); +extern void _ma_dpointer(MARIA_HA *info, byte *buff, MARIA_RECORD_POS pos); extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar **page, uchar *key); + byte **page, byte *key); extern uint _ma_get_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar **page, uchar *key); + byte **page, byte *key); extern uint _ma_get_binary_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, - uchar ** page_pos, uchar *key); -extern uchar *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *keypos, uchar *lastkey, - uchar *endpos, uint *return_key_length); -extern uchar *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *page, uchar *key, uchar *keypos, + byte ** page_pos, byte *key); +extern byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *keypos, byte *lastkey, + byte *endpos, uint *return_key_length); +extern byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, byte *keypos, uint *return_key_length); -extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, uchar *key); -extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register uchar *key, +extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, const byte *key); +extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key, HA_KEYSEG *end); -extern uchar *_ma_move_key(MARIA_KEYDEF *keyinfo, uchar *to, uchar *from); +extern byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from); extern int _ma_search_next(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - uchar *key, uint key_length, uint nextflag, + byte *key, uint key_length, uint nextflag, my_off_t pos); extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos); extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos); -extern uchar *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page, int level, uchar *buff, +extern byte *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff, int return_buffer); extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, - my_off_t page, int level, uchar *buff); + my_off_t page, int level, byte *buff); extern int _ma_dispose(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, int level); extern my_off_t _ma_new(MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level); -extern uint _ma_make_key(MARIA_HA *info, uint keynr, uchar *key, - const byte *record, my_off_t filepos); -extern uint _ma_pack_key(MARIA_HA *info, uint keynr, uchar *key, - uchar *old, uint key_length, +extern uint _ma_make_key(MARIA_HA *info, uint keynr, byte *key, + const byte *record, MARIA_RECORD_POS filepos); +extern uint _ma_pack_key(MARIA_HA *info, uint keynr, byte *key, + const byte *old, uint key_length, HA_KEYSEG ** last_used_keyseg); -extern int _ma_read_key_record(MARIA_HA *info, my_off_t filepos, - byte *buf); -extern int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, +extern int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS); +extern int _ma_read_cache(IO_CACHE *info, byte *buff, MARIA_RECORD_POS pos, uint length, int re_read_if_possibly); extern ulonglong ma_retrieve_auto_increment(MARIA_HA *info, const byte *record); -extern byte *_ma_alloc_rec_buff(MARIA_HA *, ulong, byte **); -#define _ma_get_rec_buff_ptr(info,buf) \ - ((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \ - (buf) - MARIA_REC_BUFF_OFFSET : (buf)) -#define _ma_get_rec_buff_len(info,buf) \ - (*((uint32 *)(_ma_get_rec_buff_ptr(info,buf)))) - +extern my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size, + my_size_t new_size); extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from, ulong reclength); extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, @@ -592,11 +695,13 @@ extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, byte ** record, ulong *reclength, int *flag); extern void _ma_print_key(FILE *stream, HA_KEYSEG *keyseg, - const uchar *key, uint length); -extern my_bool _ma_read_pack_info(MARIA_HA *info, pbool fix_keys); -extern int _ma_read_pack_record(MARIA_HA *info, my_off_t filepos, - byte *buf); -extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, my_off_t, my_bool); + const byte *key, uint length); +extern my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile); +extern my_bool _ma_once_end_pack_row(MARIA_SHARE *share); +extern int _ma_read_pack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos); +extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); extern int _ma_pack_rec_unpack(MARIA_HA *info, byte *to, byte *from, ulong reclength); extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b); @@ -613,9 +718,9 @@ typedef struct st_maria_block_info ulong data_len; ulong block_len; ulong blob_len; - my_off_t filepos; - my_off_t next_filepos; - my_off_t prev_filepos; + MARIA_RECORD_POS filepos; + MARIA_RECORD_POS next_filepos; + MARIA_RECORD_POS prev_filepos; uint second_read; uint offset; } MARIA_BLOCK_INFO; @@ -672,11 +777,10 @@ extern uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, uint Count, my_off_t offset, myf MyFlags); uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite); -uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state); +byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state); uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, my_bool pRead); uint _ma_base_info_write(File file, MARIA_BASE_INFO *base); -uchar *_ma_n_base_info_read(uchar *ptr, MARIA_BASE_INFO *base); int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg); char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg); uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef); @@ -690,14 +794,14 @@ ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf); ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *buf); my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, ha_checksum unique_hash, - my_off_t pos); + MARIA_RECORD_POS pos); ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *buf); -int _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, - const byte *record, my_off_t pos); -int _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, - const byte *record, my_off_t pos); -int _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, - my_bool null_are_equal); +my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal); void _ma_get_status(void *param, int concurrent_insert); void _ma_update_status(void *param); void _ma_copy_status(void *to, void *from); @@ -711,6 +815,9 @@ void _ma_setup_functions(register MARIA_SHARE *share); my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size); void _ma_remap_file(MARIA_HA *info, my_off_t size); +MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record); +my_bool _ma_write_abort_default(MARIA_HA *info); + /* Functions needed by _ma_check (are overrided in MySQL) */ C_MODE_START volatile int *_ma_killed_ptr(HA_CHECK *param); diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c index b840072aed0..0faa64327eb 100644 --- a/storage/maria/maria_ftdump.c +++ b/storage/maria/maria_ftdump.c @@ -106,7 +106,7 @@ int main(int argc,char *argv[]) maria_lock_database(info, F_EXTRA_LCK); - info->lastpos= HA_OFFSET_ERROR; + info->cur_row.lastpos= HA_OFFSET_ERROR; info->update|= HA_STATE_PREV_FOUND; while (!(error=maria_rnext(info,NULL,inx))) @@ -157,9 +157,9 @@ int main(int argc,char *argv[]) if (dump) { if (subkeys>=0) - printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf); + printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf); else - printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf); + printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys,buf); } if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) printf("%10ld\r",total); diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c index c5a53b1ffac..e4e38cb6e9d 100644 --- a/storage/maria/maria_pack.c +++ b/storage/maria/maria_pack.c @@ -239,7 +239,7 @@ int main(int argc, char **argv) } } if (ok && isamchk_neaded && !silent) - puts("Remember to run mariachk -rq on compressed tables"); + puts("Remember to run maria_chk -rq on compressed tables"); VOID(fflush(stdout)); VOID(fflush(stderr)); free_defaults(default_argv); @@ -294,7 +294,7 @@ static struct my_option my_long_options[] = static void print_version(void) { - VOID(printf("%s Ver 1.23 for %s on %s\n", + VOID(printf("%s Ver 1.0 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE)); NETWARE_SET_SCREEN_MODE(1); } @@ -308,7 +308,7 @@ static void usage(void) puts("and you are welcome to modify and redistribute it under the GPL license\n"); puts("Pack a MARIA-table to take much less space."); - puts("Keys are not updated, you must run mariachk -rq on the datafile"); + puts("Keys are not updated, you must run maria_chk -rq on the datafile"); puts("afterwards to update the keys."); puts("You should give the .MYI file as the filename argument."); @@ -359,7 +359,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), silent= 0; break; case '#': - DBUG_PUSH(argument ? argument : "d:t:o"); + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace"); break; case 'V': print_version(); @@ -665,7 +665,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) /* Display statistics. */ DBUG_PRINT("info", ("Min record length: %6d Max length: %6d " - "Mean total length: %6ld\n", + "Mean total length: %6ld", mrg->min_pack_length, mrg->max_pack_length, (ulong) (mrg->records ? (new_length/mrg->records) : 0))); if (verbose && mrg->records) @@ -681,6 +681,7 @@ static int compress(PACK_MRG_INFO *mrg,char *result_table) { error|=my_close(isam_file->dfile,MYF(MY_WME)); isam_file->dfile= -1; /* Tell maria_close file is closed */ + isam_file->s->bitmap.file= -1; } } @@ -841,32 +842,27 @@ static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees, static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) { int error; - uint length; + uint length, null_bytes; ulong reclength,max_blob_length; byte *record,*pos,*next_pos,*end_pos,*start_pos; ha_rows record_count; - my_bool static_row_size; HUFF_COUNTS *count,*end_count; TREE_ELEMENT *element; + ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); DBUG_ENTER("get_statistic"); - reclength=mrg->file[0]->s->base.reclength; + reclength= mrg->file[0]->s->base.reclength; + null_bytes= mrg->file[0]->s->base.null_bytes; record=(byte*) my_alloca(reclength); end_count=huff_counts+mrg->file[0]->s->base.fields; record_count=0; glob_crc=0; max_blob_length=0; /* Check how to calculate checksum */ - static_row_size=1; - for (count=huff_counts ; count < end_count ; count++) - { - if (count->field_type == FIELD_BLOB || - count->field_type == FIELD_VARCHAR) - { - static_row_size=0; - break; - } - } + if (mrg->file[0]->s->data_file_type == STATIC_RECORD) + calc_checksum= _ma_static_checksum; + else + calc_checksum= _ma_checksum; mrg_reset(mrg); while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) @@ -875,13 +871,10 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) if (! error) { /* glob_crc is a checksum over all bytes of all records. */ - if (static_row_size) - glob_crc+=_ma_static_checksum(mrg->file[0],record); - else - glob_crc+=_ma_checksum(mrg->file[0],record); + glob_crc+= (*calc_checksum)(mrg->file[0],record); /* Count the incidence of values separately for every column. */ - for (pos=record,count=huff_counts ; + for (pos=record + null_bytes, count=huff_counts ; count < end_count ; count++, pos=next_pos) @@ -1109,14 +1102,14 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) DBUG_PRINT("info", ("column: %3lu", count - huff_counts + 1)); if (verbose >= 2) - VOID(printf("column: %3lu\n", count - huff_counts + 1)); + VOID(printf("column: %3u\n", count - huff_counts + 1)); if (count->tree_buff) { DBUG_PRINT("info", ("number of distinct values: %lu", (count->tree_pos - count->tree_buff) / count->field_length)); if (verbose >= 2) - VOID(printf("number of distinct values: %lu\n", + VOID(printf("number of distinct values: %u\n", (count->tree_pos - count->tree_buff) / count->field_length)); } @@ -1368,7 +1361,8 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees, DBUG_VOID_RETURN; } - /* Test if we can use space-compression and empty-field-compression */ + +/* Test if we can use space-compression and empty-field-compression */ static int test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records, @@ -2281,7 +2275,7 @@ static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) if (bits > 8 * sizeof(code)) { VOID(fflush(stdout)); - VOID(fprintf(stderr, "error: Huffman code too long: %u/%lu\n", + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", bits, 8 * sizeof(code))); errors++; break; @@ -2410,8 +2404,8 @@ static uint max_bit(register uint value) static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) { int error; - uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length, - intervall,field_length,max_pack_length,pack_blob_length; + uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length; + uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes; my_off_t record_count; char llbuf[32]; ulong length,pack_length; @@ -2429,6 +2423,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) end_count=huff_counts+isam_file->s->base.fields; min_record_length= (uint) ~0; max_record_length=0; + null_bytes= isam_file->s->base.null_bytes; /* Calculate the maximum number of bits required to pack the records. @@ -2439,7 +2434,8 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) Empty blobs and varchar are encoded with a single 1 bit. Other blobs and varchar get a leading 0 bit. */ - for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++) + max_calc_length= null_bytes; + for (i= 0 ; i < isam_file->s->base.fields ; i++) { if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) huff_counts[i].max_zero_fill=0; @@ -2475,8 +2471,16 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length)) break; record_pos= (byte*) file_buffer.pos; - file_buffer.pos+=max_pack_length; - for (start_pos=record, count= huff_counts; count < end_count ; count++) + file_buffer.pos+= max_pack_length; + if (null_bytes) + { + /* Copy null bits 'as is' */ + memcpy(file_buffer.pos, record, null_bytes); + file_buffer.pos+= null_bytes; + } + for (start_pos=record+null_bytes, count= huff_counts; + count < end_count ; + count++) { end_pos=start_pos+(field_length=count->field_length); tree=count->tree; @@ -2738,8 +2742,9 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length; pack_length= _ma_save_pack_length(pack_version, record_pos, length); if (pack_blob_length) - pack_length+= _ma_save_pack_length(pack_version, record_pos + pack_length, - tot_blob_length); + pack_length+= _ma_save_pack_length(pack_version, + record_pos + pack_length, + tot_blob_length); DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu " "length-bytes: %lu", (ulong) record_count, length, tot_blob_length, pack_length)); @@ -2934,7 +2939,8 @@ static void flush_bits(void) ** functions to handle the joined files ****************************************************************************/ -static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg, + my_off_t new_length, ha_checksum crc) { MARIA_SHARE *share=isam_file->s; @@ -2944,6 +2950,8 @@ static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA; mi_int2store(share->state.header.options,options); + share->state.header.org_data_file_type= share->state.header.data_file_type; + share->state.header.data_file_type= COMPRESSED_RECORD; share->state.state.data_file_length=new_length; share->state.state.del=0; @@ -2962,14 +2970,13 @@ static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length } /* If there are no disabled indexes, keep key_file_length value from - original file so "mariachk -rq" can use this value (this is necessary + original file so "maria_chk -rq" can use this value (this is necessary because index size cannot be easily calculated for fulltext keys) */ maria_clear_all_keys_active(share->state.key_map); for (key=0 ; key < share->base.keys ; key++) share->state.key_root[key]= HA_OFFSET_ERROR; - for (key=0 ; key < share->state.header.max_block_size_index ; key++) - share->state.key_del[key]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; isam_file->state->checksum=crc; /* Save crc here */ share->changed=1; /* Force write of header */ share->state.open_count=0; @@ -3037,21 +3044,18 @@ static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) info->end=info->current+info->count; maria_reset(isam_info); maria_extra(isam_info, HA_EXTRA_CACHE, 0); - filepos=isam_info->s->pack.header_length; + if ((error= maria_scan_init(isam_info))) + return(error); } else - { isam_info= *info->current; - filepos= isam_info->nextpos; - } for (;;) { - isam_info->update&= HA_STATE_CHANGED; - if (!(error=(*isam_info->s->read_rnd)(isam_info,(byte*) buf, - filepos, 1)) || + if (!(error= maria_scan(isam_info, buf)) || error != HA_ERR_END_OF_FILE) return (error); + maria_scan_end(isam_info); maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0); if (info->current+1 == info->end) return(HA_ERR_END_OF_FILE); @@ -3060,6 +3064,8 @@ static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) filepos=isam_info->s->pack.header_length; maria_reset(isam_info); maria_extra(isam_info,HA_EXTRA_CACHE, 0); + if ((error= maria_scan_init(isam_info))) + return(error); } } @@ -3068,11 +3074,13 @@ static int mrg_close(PACK_MRG_INFO *mrg) { uint i; int error=0; + DBUG_ENTER("mrg_close"); + for (i=0 ; i < mrg->count ; i++) error|=maria_close(mrg->file[i]); if (mrg->free_file) my_free((gptr) mrg->file,MYF(0)); - return error; + DBUG_RETURN(error); } diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 22860698a3a..05d4e5d77cf 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -911,6 +911,9 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend) if (*killed_ptr(param)) goto err2; switch (info->s->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: if (my_b_read(¶m->read_cache,(byte*) record, info->s->base.pack_reclength)) @@ -2897,6 +2900,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); switch (share->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: for (;;) { @@ -3283,6 +3289,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param) if (sort_param->fix_datafile) { switch (sort_info->new_data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, share->base.pack_reclength)) @@ -3395,18 +3404,19 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) if (sort_info->key_block->inited) { - cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, diff_pos); if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) - ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos[0]= mi_collect_stats_nonulls_next(sort_param->seg, sort_param->notnull, - sort_info->key_block->lastkey, + (uchar*) sort_info-> + key_block->lastkey, (uchar*)a); } sort_param->unique[diff_pos[0]-1]++; @@ -3429,8 +3439,8 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) llstr(sort_info->info->lastpos,llbuff), llstr(get_record_for_key(sort_info->info, sort_param->keyinfo, - sort_info->key_block-> - lastkey), + (uchar*) sort_info-> + key_block->lastkey), llbuff2)); param->testflag|=T_RETRY_WITHOUT_QUICK; if (sort_info->param->testflag & T_VERBOSE) @@ -3461,19 +3471,19 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) val_len=share->ft2_keyinfo.keylength; get_key_full_length_rdonly(val_off, ft_buf->lastkey); - to=ft_buf->lastkey+val_off; + to= (uchar*) ft_buf->lastkey+val_off; if (ft_buf->buf) { /* flushing first-level tree */ - error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey, HA_OFFSET_ERROR); for (from=to+val_len; - !error && from < ft_buf->buf; + !error && from < (uchar*) ft_buf->buf; from+= val_len) { memcpy(to, from, val_len); - error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey, HA_OFFSET_ERROR); } return error; @@ -3482,8 +3492,8 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) error=flush_pending_blocks(sort_param); /* updating lastkey with second-level tree info */ ft_intXstore(ft_buf->lastkey+val_off, -ft_buf->count); - _mi_dpointer(sort_info->info, ft_buf->lastkey+val_off+HA_FT_WLEN, - share->state.key_root[sort_param->key]); + _mi_dpointer(sort_info->info, (uchar*) ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); /* restoring first level tree data in sort_info/sort_param */ sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; sort_param->keyinfo=share->keyinfo+sort_param->key; @@ -3491,7 +3501,7 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) /* writing lastkey in first-level tree */ return error ? error : sort_insert_key(sort_param,sort_info->key_block, - ft_buf->lastkey,HA_OFFSET_ERROR); + (uchar*) ft_buf->lastkey,HA_OFFSET_ERROR); } static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) @@ -3530,7 +3540,7 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) if (ha_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, - ft_buf->lastkey+1,val_off-1, 0, 0)==0) + (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0) { if (!ft_buf->buf) /* store in second-level tree */ { @@ -3546,16 +3556,16 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) return 0; /* converting to two-level tree */ - p=ft_buf->lastkey+val_off; + p= (uchar*) ft_buf->lastkey+val_off; while (key_block->inited) key_block++; sort_info->key_block=key_block; sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; - ft_buf->count=(ft_buf->buf - p)/val_len; + ft_buf->count=((uchar*) ft_buf->buf - p)/val_len; /* flushing buffer to second-level tree */ - for (error=0; !error && p < ft_buf->buf; p+= val_len) + for (error=0; !error && p < (uchar*) ft_buf->buf; p+= val_len) error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); ft_buf->buf=0; return error; @@ -3607,9 +3617,9 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_insert_key"); - anc_buff=key_block->buff; + anc_buff= (uchar*) key_block->buff; info=sort_info->info; - lastkey=key_block->lastkey; + lastkey= (uchar*) key_block->lastkey; nod_flag= (key_block == sort_info->key_block ? 0 : info->s->base.key_reflength); @@ -3622,7 +3632,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_RETURN(1); } a_length=2+nod_flag; - key_block->end_pos=anc_buff+2; + key_block->end_pos= (char*) anc_buff+2; lastkey=0; /* No previous key in block */ } else @@ -3630,18 +3640,18 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, /* Save pointer to previous block */ if (nod_flag) - _mi_kpointer(info,key_block->end_pos,prev_block); + _mi_kpointer(info,(uchar*) key_block->end_pos,prev_block); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0,lastkey,lastkey,key, &s_temp); - (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); + (*keyinfo->store_key)(keyinfo, (uchar*) key_block->end_pos+nod_flag,&s_temp); a_length+=t_length; mi_putint(anc_buff,a_length,nod_flag); key_block->end_pos+=t_length; if (a_length <= keyinfo->block_length) { - VOID(_mi_move_key(keyinfo,key_block->lastkey,key)); + VOID(_mi_move_key(keyinfo,(uchar*) key_block->lastkey,key)); key_block->last_length=a_length-t_length; DBUG_RETURN(0); } @@ -3666,7 +3676,8 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_DUMP("buff",(byte*) anc_buff,mi_getint(anc_buff)); /* Write separator-key to block in next level */ - if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) + if (sort_insert_key(sort_param,key_block+1,(uchar*) key_block->lastkey, + filepos)) DBUG_RETURN(1); /* clear old block and write new key in it */ @@ -3752,7 +3763,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) key_block->inited=0; length=mi_getint(key_block->buff); if (nod_flag) - _mi_kpointer(info,key_block->end_pos,filepos); + _mi_kpointer(info,(uchar*) key_block->end_pos,filepos); key_file_length=info->state->key_file_length; bzero((byte*) key_block->buff+length, keyinfo->block_length-length); if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) @@ -3762,7 +3773,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) if (key_file_length == info->state->key_file_length) { if (_mi_write_keypage(info, keyinfo, filepos, - DFLT_INIT_HITS, key_block->buff)) + DFLT_INIT_HITS, (uchar*) key_block->buff)) DBUG_RETURN(1); } else if (my_pwrite(info->s->kfile,(byte*) key_block->buff, @@ -3794,7 +3805,7 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, for (i=0 ; i < blocks ; i++) { block[i].inited=0; - block[i].buff=(uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i; + block[i].buff=(byte*) (block+blocks)+(buffer_length+IO_SIZE)*i; } DBUG_RETURN(block); } /* alloc_key_blocks */ diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index ad824bef009..cbd431cc222 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -41,7 +41,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, File dfile,file; int errpos,save_errno, create_mode= O_RDWR | O_TRUNC; myf create_flag; - uint fields,length,max_key_length,packed,pointer,real_length_diff, + uint fields,length,max_key_length,packed,pack_bytes,pointer,real_length_diff, key_length,info_length,key_segs,options,min_key_length_skip, base_pos,long_varchar_count,varchar_length, max_key_block_length,unique_key_parts,fulltext_keys,offset; @@ -189,11 +189,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) options|= HA_OPTION_RELIES_ON_SQL_LAYER; - packed=(packed+7)/8; + pack_bytes= (packed+7)/8; if (pack_reclength != INT_MAX32) pack_reclength+= reclength+packed + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); - min_pack_length+=packed; + min_pack_length+= pack_bytes; if (!ci->data_file_length && ci->max_rows) { @@ -547,9 +547,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); share.base.max_pack_length=pack_reclength; share.base.min_pack_length=min_pack_length; - share.base.pack_bits=packed; + share.base.pack_bits= pack_bytes; share.base.fields=fields; - share.base.pack_fields=packed; + share.base.pack_fields= packed; #ifdef USE_RAID share.base.raid_type=ci->raid_type; share.base.raid_chunks=ci->raid_chunks; diff --git a/storage/myisam/mi_rsamepos.c b/storage/myisam/mi_rsamepos.c index c4bd5fa16fa..d2dba64b0fd 100644 --- a/storage/myisam/mi_rsamepos.c +++ b/storage/myisam/mi_rsamepos.c @@ -33,7 +33,8 @@ int mi_rsame_with_pos(MI_INFO *info, byte *record, int inx, my_off_t filepos) DBUG_ENTER("mi_rsame_with_pos"); DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos)); - if (inx < -1 || inx >= 0 && ! mi_is_key_active(info->s->state.key_map, inx)) + if (inx < -1 || + (inx >= 0 && ! mi_is_key_active(info->s->state.key_map, inx))) { DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); } diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 357128b7a40..cf603f85630 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -603,7 +603,7 @@ int main(int argc, char *argv[]) if (mi_rsame(file,read_record2,(int) i)) goto err; if (bcmp(read_record,read_record2,reclength) != 0) { - printf("is_rsame didn't find same record\n"); + printf("mi_rsame didn't find same record\n"); goto end; } } diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index 0369be5db9d..50ba22e8beb 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -802,7 +802,7 @@ static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, register uint count; uint16 length_of_key = 0; uint idx; - uchar *buffp; + byte *buffp; if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) { @@ -889,7 +889,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, for (buffpek= Fb ; buffpek <= Tb ; buffpek++) { count+= buffpek->count; - buffpek->base= strpos; + buffpek->base= (byte*) strpos; buffpek->max_keys=maxcount; strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek, sort_length)); @@ -927,7 +927,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, { if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) { - uchar *base=buffpek->base; + byte *base= buffpek->base; uint max_keys=buffpek->max_keys; VOID(queue_remove(&queue,0)); @@ -959,7 +959,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, } } buffpek=(BUFFPEK*) queue_top(&queue); - buffpek->base=(uchar *) sort_keys; + buffpek->base= (byte*) sort_keys; buffpek->max_keys=keys; do { @@ -974,7 +974,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key; + strpos= (uchar*) buffpek->key; for (end=strpos+buffpek->mem_count*sort_length; strpos != end ; strpos+=sort_length) diff --git a/support-files/magic b/support-files/magic index 9844142ba93..c8ca5875211 100644 --- a/support-files/magic +++ b/support-files/magic @@ -4,9 +4,13 @@ # 0 beshort 0xfe01 MySQL table definition file >2 byte x Version %d -0 belong&0xffffff00 0xfefe0300 MySQL MISAM index file +0 belong&0xffffff00 0xfefe0700 MySQL MISAM index file >3 byte x Version %d -0 belong&0xffffff00 0xfefe0700 MySQL MISAM compressed data file +0 belong&0xffffff00 0xfefe0800 MySQL MISAM compressed data file +>3 byte x Version %d +0 belong&0xffffff00 0xfefe0900 MySQL Maria index file +>3 byte x Version %d +0 belong&0xffffff00 0xfefe0A00 MySQL Maria compressed data file >3 byte x Version %d 0 belong&0xffffff00 0xfefe0500 MySQL ISAM index file >3 byte x Version %d From 7412f0fa0cd498f06fe04966a6f6161e8b32d0a2 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 26 Jan 2007 13:32:02 +0200 Subject: [PATCH 86/95] After merge fixes Removed compiler warnings Fixed clashing function name in maria Disable maria tests from MySQL level for now BitKeeper/deleted/.del-ha_maria.cc: Rename: libmysqld/ha_maria.cc -> BitKeeper/deleted/.del-ha_maria.cc BitKeeper/etc/ignore: added libmysqld/ha_maria.cc --- added storage/maria/unittest/maria_control unittest/maria_control --- added *.Tpo --- added unittest/page_cache_test_file_1 --- added unittest/pagecache_debug.log --- added unittest/mysys/mf_pagecache_consist_1k-t-big unittest/mysys/mf_pagecache_consist_1kHC-t-big unittest/mysys/mf_pagecache_consist_1kRD-t-big unittest/mysys/mf_pagecache_consist_1kWR-t-big unittest/mysys/mf_pagecache_consist_64k-t-big unittest/mysys/mf_pagecache_consist_64kHC-t-big unittest/mysys/mf_pagecache_consist_64kRD-t-big unittest/mysys/mf_pagecache_consist_64kWR-t-big --- added unittest/mysys/mf_pagecache_single_64k-t-big Makefile.am: Don't run 'test-unit' by default (takes too long time) client/mysqldump.c: Fixed compiler warning include/lf.h: Remove compiler warnings about not used require_pins constant include/pagecache.h: LSN should be of type ulonglong (This fixes some compiler warnings) mysql-test/r/events_logs_tests.result: Make test predictable mysql-test/r/view.result: Make test results predictable mysql-test/t/disabled.def: Disable maria tests for a while mysql-test/t/events_logs_tests.test: Make test predictable mysql-test/t/view.test: Make test results predictable mysys/lf_alloc-pin.c: #warning ->QQ mysys/lf_hash.c: #warning ->QQ Removed compiler warnings mysys/mf_pagecache.c: Removed compiler warnings mysys/my_rename.c: Removed compiler warnings plugin/daemon_example/daemon_example.c: Remove compiler warning sql/ha_ndbcluster.cc: Remove compiler warning sql/udf_example.c: Remove compiler warning storage/maria/lockman.c: Changed #warnings to QQ comment Removed compiler warnings storage/maria/ma_blockrec.c: Removed compiler warnings storage/maria/ma_check.c: After merge fixes storage/maria/ma_key.c: After merge fixes storage/maria/ma_packrec.c: After merge fixes storage/maria/ma_rkey.c: After merge fixes storage/maria/ma_sort.c: After merge fixes storage/maria/ma_sp_defs.h: Rename clashing function name storage/maria/ma_sp_key.c: Rename clashing function name storage/maria/ma_test_all.res: New test results storage/maria/ma_unique.c: Fixed compiler warning storage/maria/tablockman.c: #warning -> QQ storage/maria/tablockman.h: #warning -> QQ storage/maria/trnman.c: #warning -> QQ storage/maria/unittest/lockman2-t.c: Removed compiler warnings storage/maria/unittest/ma_control_file-t.c: Removed warning for 'maria_control' file not found storage/maria/unittest/trnman-t.c: Removed compiler warnings storage/ndb/src/mgmapi/mgmapi.cpp: Remove compiler warnings unittest/mysys/mf_pagecache_consist.c: Removed compiler warnings unittest/mysys/my_atomic-t.c: Removed compiler warnings --- .bzrignore | 15 ++++ Makefile.am | 4 +- client/mysqldump.c | 4 +- include/lf.h | 3 +- include/pagecache.h | 2 +- mysql-test/r/events_logs_tests.result | 2 +- mysql-test/r/view.result | 11 +++ mysql-test/t/disabled.def | 2 + mysql-test/t/events_logs_tests.test | 2 +- mysql-test/t/view.test | 18 +++- mysys/lf_alloc-pin.c | 2 +- mysys/lf_hash.c | 13 +-- mysys/mf_pagecache.c | 98 +++++++++++----------- mysys/my_rename.c | 3 +- plugin/daemon_example/daemon_example.c | 4 +- sql/ha_ndbcluster.cc | 4 +- sql/udf_example.c | 2 +- storage/maria/lockman.c | 27 +++--- storage/maria/ma_blockrec.c | 11 +-- storage/maria/ma_check.c | 36 +++++--- storage/maria/ma_key.c | 6 +- storage/maria/ma_packrec.c | 2 +- storage/maria/ma_rkey.c | 3 +- storage/maria/ma_sort.c | 3 +- storage/maria/ma_sp_defs.h | 4 +- storage/maria/ma_sp_key.c | 4 +- storage/maria/ma_test_all.res | 85 ++++++++++--------- storage/maria/ma_unique.c | 2 +- storage/maria/tablockman.c | 6 +- storage/maria/tablockman.h | 2 +- storage/maria/trnman.c | 2 +- storage/maria/unittest/lockman2-t.c | 21 +++-- storage/maria/unittest/ma_control_file-t.c | 12 +-- storage/maria/unittest/trnman-t.c | 5 +- storage/ndb/src/mgmapi/mgmapi.cpp | 12 +-- unittest/mysys/mf_pagecache_consist.c | 2 +- unittest/mysys/my_atomic-t.c | 2 +- 37 files changed, 258 insertions(+), 178 deletions(-) diff --git a/.bzrignore b/.bzrignore index bd7941f77ab..7dd012950d8 100644 --- a/.bzrignore +++ b/.bzrignore @@ -2946,3 +2946,18 @@ win/vs71cache.txt win/vs8cache.txt zlib/*.ds? zlib/*.vcproj +libmysqld/ha_maria.cc +storage/maria/unittest/maria_control +unittest/maria_control +*.Tpo +unittest/page_cache_test_file_1 +unittest/pagecache_debug.log +unittest/mysys/mf_pagecache_consist_1k-t-big +unittest/mysys/mf_pagecache_consist_1kHC-t-big +unittest/mysys/mf_pagecache_consist_1kRD-t-big +unittest/mysys/mf_pagecache_consist_1kWR-t-big +unittest/mysys/mf_pagecache_consist_64k-t-big +unittest/mysys/mf_pagecache_consist_64kHC-t-big +unittest/mysys/mf_pagecache_consist_64kRD-t-big +unittest/mysys/mf_pagecache_consist_64kWR-t-big +unittest/mysys/mf_pagecache_single_64k-t-big diff --git a/Makefile.am b/Makefile.am index 81656d67f74..ed2f8d99414 100644 --- a/Makefile.am +++ b/Makefile.am @@ -137,9 +137,9 @@ test-binlog-statement: cd mysql-test ; \ ./mysql-test-run.pl $(force) --mysqld=--binlog-format=statement -test: test-unit test-ns test-pr +test: test-ns test-pr -test-full: test test-nr test-ps +test-full: test-unit test test-nr test-ps test-force: $(MAKE) force=--force test diff --git a/client/mysqldump.c b/client/mysqldump.c index 5d87701fe10..0d6b5678e52 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -2877,7 +2877,7 @@ static int dump_tablespaces_for_tables(char *db, char **table_names, int tables) dynstr_trunc(&where, 1); dynstr_append(&where,"))"); - DBUG_PRINT("info",("Dump TS for Tables where: %s",where)); + DBUG_PRINT("info",("Dump TS for Tables where: %s",where.str)); r= dump_tablespaces(where.str); dynstr_free(&where); return r; @@ -2907,7 +2907,7 @@ static int dump_tablespaces_for_databases(char** databases) dynstr_trunc(&where, 1); dynstr_append(&where,"))"); - DBUG_PRINT("info",("Dump TS for DBs where: %s",where)); + DBUG_PRINT("info",("Dump TS for DBs where: %s",where.str)); r= dump_tablespaces(where.str); dynstr_free(&where); return r; diff --git a/include/lf.h b/include/lf.h index c161bd5044d..39a47e6568a 100644 --- a/include/lf.h +++ b/include/lf.h @@ -135,8 +135,9 @@ typedef struct { */ #if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) #define LF_REQUIRE_PINS(N) \ - static const char require_pins[LF_PINBOX_PINS-N]; \ + static const char require_pins[LF_PINBOX_PINS-N] __attribute__ ((unused)); \ static const int LF_NUM_PINS_IN_THIS_FILE= N + #define _lf_pin(PINS, PIN, ADDR) \ ( \ assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \ diff --git a/include/pagecache.h b/include/pagecache.h index 418e0203f64..b698a36a5e4 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -73,7 +73,7 @@ typedef void *PAGECACHE_PAGE_LINK; /* TODO: move to loghandler emulator */ typedef void LOG_HANDLER; -typedef void *LSN; +typedef ulonglong LSN; /* file descriptor for Maria */ typedef struct st_pagecache_file diff --git a/mysql-test/r/events_logs_tests.result b/mysql-test/r/events_logs_tests.result index 335ca848387..b067b2011e1 100644 --- a/mysql-test/r/events_logs_tests.result +++ b/mysql-test/r/events_logs_tests.result @@ -83,7 +83,7 @@ slo_val val 4 0 1 0 "Check slow log. Should see 1 row because 4 is over the threshold of 3 for GLOBAL, though under SESSION which is 10" -SELECT user_host, query_time, db, sql_text FROM mysql.slow_log; +SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event"; user_host query_time db sql_text USER_HOST SLEEPVAL events_test INSERT INTO slow_event_test SELECT @@long_query_time, SLEEP(2) DROP EVENT long_event2; diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result index 4bc122cc97b..eb090658560 100644 --- a/mysql-test/r/view.result +++ b/mysql-test/r/view.result @@ -2684,10 +2684,12 @@ FROM t1 HAVING Age < 75; SHOW CREATE VIEW v1; View Create View v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select (year(now()) - year(`t1`.`DOB`)) AS `Age` from `t1` having (`Age` < 75) +set timestamp=1136066400; SELECT (year(now())-year(DOB)) AS Age FROM t1 HAVING Age < 75; Age 42 38 +set timestamp=1136066400; SELECT * FROM v1; Age 42 @@ -3014,6 +3016,15 @@ i j 6 3 DROP VIEW v1, v2; DROP TABLE t1; +DROP VIEW IF EXISTS v1; +CREATE VIEW v1 AS SELECT 'The\ZEnd'; +SELECT * FROM v1; +TheEnd +ThezEnd +SHOW CREATE VIEW v1; +View Create View +v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select _latin1'ThezEnd' AS `TheEnd` +DROP VIEW v1; End of 5.0 tests. DROP DATABASE IF EXISTS `d-1`; CREATE DATABASE `d-1`; diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def index c36ac1bd168..3b063841512 100644 --- a/mysql-test/t/disabled.def +++ b/mysql-test/t/disabled.def @@ -38,3 +38,5 @@ ndb_autodiscover3 : bug#21806 flush2 : Bug#24805 Pushbuild can't handle test with --disable-log-bin +maria : Until maria is fully functional +ps_maria : Until maria is fully functional diff --git a/mysql-test/t/events_logs_tests.test b/mysql-test/t/events_logs_tests.test index aee16595ef9..001004386e4 100644 --- a/mysql-test/t/events_logs_tests.test +++ b/mysql-test/t/events_logs_tests.test @@ -91,7 +91,7 @@ CREATE EVENT long_event2 ON SCHEDULE EVERY 1 MINUTE DO INSERT INTO slow_event_te SELECT * FROM slow_event_test; --echo "Check slow log. Should see 1 row because 4 is over the threshold of 3 for GLOBAL, though under SESSION which is 10" --replace_column 1 USER_HOST 2 SLEEPVAL -SELECT user_host, query_time, db, sql_text FROM mysql.slow_log; +SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event"; DROP EVENT long_event2; --echo "Make it quite long" SET SESSION long_query_time=300; diff --git a/mysql-test/t/view.test b/mysql-test/t/view.test index 7175db1db4e..3c5308a935c 100644 --- a/mysql-test/t/view.test +++ b/mysql-test/t/view.test @@ -2553,7 +2553,9 @@ CREATE VIEW v1 AS FROM t1 HAVING Age < 75; SHOW CREATE VIEW v1; +set timestamp=1136066400; SELECT (year(now())-year(DOB)) AS Age FROM t1 HAVING Age < 75; +set timestamp=1136066400; SELECT * FROM v1; DROP VIEW v1; @@ -2906,6 +2908,7 @@ DROP FUNCTION f1; DROP VIEW v1; DROP TABLE t1; +# # Bug #16813 (WITH CHECK OPTION doesn't work with UPDATE) # CREATE TABLE t1(id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, val INT UNSIGNED NOT NULL); @@ -2919,7 +2922,6 @@ UPDATE v1 SET val=6 WHERE id=2; DROP VIEW v1; DROP TABLE t1; - # # BUG#22584: last_insert_id not updated after inserting a record # through a updatable view @@ -2957,6 +2959,20 @@ SELECT * FROM t1; DROP VIEW v1, v2; DROP TABLE t1; +# +# BUG#24293: '\Z' token is not handled correctly in views +# + +--disable_warnings +DROP VIEW IF EXISTS v1; +--enable_warnings + +CREATE VIEW v1 AS SELECT 'The\ZEnd'; +SELECT * FROM v1; + +SHOW CREATE VIEW v1; + +DROP VIEW v1; --echo End of 5.0 tests. diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index d0fa29ddaaf..e964553a64c 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,4 +1,4 @@ -#warning TODO multi-pinbox +/* QQ: TODO multi-pinbox */ /* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index b2ad7a93ace..fb2fb88492f 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -86,7 +86,7 @@ retry: if (!cursor->curr) return 0; do { -#warning XXX or goto retry ? + /* QQ: XXX or goto retry ? */ link= cursor->curr->link; cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); @@ -105,7 +105,8 @@ retry: { int r= 1; if (cur_hashnr > hashnr || - (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) + (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key, + keylen)) >= 0) return !r; } cursor->prev= &(cursor->curr->link); @@ -243,7 +244,8 @@ static inline const byte* hash_key(const LF_HASH *hash, static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) { ulong nr1= 1, nr2= 4; - hash->charset->coll->hash_sort(hash->charset, key, keylen, &nr1, &nr2); + hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen, + &nr1, &nr2); return nr1 & INT_MAX32; } @@ -375,7 +377,7 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) return found ? found+1 : 0; } -static char *dummy_key= ""; +static const char *dummy_key= ""; static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) @@ -387,7 +389,7 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, if (*el == NULL && bucket) initialize_bucket(hash, el, parent, pins); dummy->hashnr= my_reverse_bits(bucket); - dummy->key= dummy_key; + dummy->key= (char*) dummy_key; dummy->keylen= 0; if ((cur= linsert(el, hash->charset, dummy, pins, 0))) { @@ -396,4 +398,3 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, } my_atomic_casptr((void **)node, (void **)&tmp, dummy); } - diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 5f4e8d1a97d..0116dfabfa1 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -47,7 +47,6 @@ #include #include - /* Some compilation flags have been added specifically for this module to control the following: @@ -429,7 +428,6 @@ error: #define FLUSH_CACHE 2000 /* sort this many blocks at once */ -static int flush_all_key_blocks(PAGECACHE *pagecache); #ifdef THREAD static void link_into_queue(PAGECACHE_WQUEUE *wqueue, struct st_my_thread_var *thread); @@ -793,6 +791,40 @@ err: } +/* + Flush all blocks in the key cache to disk +*/ + +#ifdef NOT_USED +static int flush_all_key_blocks(PAGECACHE *pagecache) +{ +#if defined(PAGECACHE_DEBUG) + uint cnt=0; +#endif + while (pagecache->blocks_changed > 0) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->used_last->next_used ; ; block=block->next_used) + { + if (block->hash_link) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, + FLUSH_RELEASE)) + return 1; + break; + } + if (block == pagecache->used_last) + break; + } + } + return 0; +} +#endif /* NOT_USED */ + /* Resize a key cache @@ -827,7 +859,7 @@ err: resizing, due to the page locking specific to this page cache. So we disable it for now. */ -#if 0 /* keep disabled until code is fixed see above !! */ +#if NOT_USED /* keep disabled until code is fixed see above !! */ int resize_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold) @@ -1383,7 +1415,7 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, block->requests, pagecache->blocks_available)); BLOCK_INFO(block); - KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0); + KEYCACHE_DBUG_ASSERT((int) pagecache->blocks_available >= 0); #endif } @@ -2511,7 +2543,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page > 0); + DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -2675,7 +2707,7 @@ void pagecache_unlock(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page > 0); + DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -3251,7 +3283,9 @@ restart: #ifndef DBUG_OFF int rc= #endif -#warning we are doing an unlock here, so need to give the page its rec_lsn! + /* + QQ: We are doing an unlock here, so need to give the page its rec_lsn + */ pagecache_make_lock_and_pin(pagecache, block, write_lock_change_table[lock].unlock_lock, write_pin_change_table[pin].unlock_pin); @@ -3612,11 +3646,14 @@ restart: else { /* Link the block into a list of blocks 'in switch' */ -#warning this unlink_changed() is a serious problem for Maria's Checkpoint: it \ -removes a page from the list of dirty pages, while it's still dirty. A \ - solution is to abandon first_in_switch, just wait for this page to be \ - flushed by somebody else, and loop. TODO: check all places where we remove a \ - page from the list of dirty pages + /* QQ: + #warning this unlink_changed() is a serious problem for + Maria's Checkpoint: it removes a page from the list of dirty + pages, while it's still dirty. A solution is to abandon + first_in_switch, just wait for this page to be + flushed by somebody else, and loop. TODO: check all places + where we remove a page from the list of dirty pages + */ unlink_changed(block); link_changed(block, &first_in_switch); } @@ -3727,39 +3764,6 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, } -/* - Flush all blocks in the key cache to disk -*/ - -static int flush_all_key_blocks(PAGECACHE *pagecache) -{ -#if defined(PAGECACHE_DEBUG) - uint cnt=0; -#endif - while (pagecache->blocks_changed > 0) - { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->used_last->next_used ; ; block=block->next_used) - { - if (block->hash_link) - { -#if defined(PAGECACHE_DEBUG) - cnt++; - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); -#endif - if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, - FLUSH_RELEASE)) - return 1; - break; - } - if (block == pagecache->used_last) - break; - } - } - return 0; -} - - /* Reset the counters of a key cache. @@ -3876,7 +3880,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, 1) and 2) are critical problems. TODO: fix this when Monty has explained how he writes BLOB pages. */ - if (0 == block->rec_lsn) + if (block->rec_lsn == 0) { DBUG_ASSERT(0); goto err; @@ -3908,7 +3912,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ptr+= 4; int4store(ptr, block->hash_link->pageno); ptr+= 4; - int8store(ptr, (ulonglong)block->rec_lsn); + int8store(ptr, (ulonglong) block->rec_lsn); ptr+= 8; set_if_bigger(*max_lsn, block->rec_lsn); } diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 8c2a354324b..c3870db177d 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -17,8 +17,9 @@ #include "mysys_priv.h" #include #include "mysys_err.h" - +#include "m_string.h" #undef my_rename + /* On unix rename deletes to file if it exists */ int my_rename(const char *from, const char *to, myf MyFlags) diff --git a/plugin/daemon_example/daemon_example.c b/plugin/daemon_example/daemon_example.c index d302aec6515..786ff199c93 100644 --- a/plugin/daemon_example/daemon_example.c +++ b/plugin/daemon_example/daemon_example.c @@ -40,7 +40,7 @@ 1 failure (cannot happen) */ -static int daemon_example_plugin_init(void *p) +static int daemon_example_plugin_init(void *p __attribute__ ((unused))) { return(0); } @@ -59,7 +59,7 @@ static int daemon_example_plugin_init(void *p) */ -static int daemon_example_plugin_deinit(void *p) +static int daemon_example_plugin_deinit(void *p __attribute__ ((unused))) { return(0); } diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 0362b8bf215..0c3a79baacc 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -7297,9 +7297,9 @@ static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, static void print_share(const char* where, NDB_SHARE* share) { fprintf(DBUG_FILE, - "%s %s.%s: use_count: %u, commit_count: %llu\n", + "%s %s.%s: use_count: %u, commit_count: %lu\n", where, share->db, share->table_name, share->use_count, - share->commit_count); + (ulong) share->commit_count); fprintf(DBUG_FILE, " - key: %s, key_length: %d\n", share->key, share->key_length); diff --git a/sql/udf_example.c b/sql/udf_example.c index bbab47e253d..a71cc4b44f2 100644 --- a/sql/udf_example.c +++ b/sql/udf_example.c @@ -1087,7 +1087,7 @@ my_bool is_const_init(UDF_INIT *initid, UDF_ARGS *args, char *message) strmov(message, "IS_CONST accepts only one argument"); return 1; } - initid->ptr= (char*)((args->args[0] != NULL) ? 1 : 0); + initid->ptr= (char*)(size_t)((args->args[0] != NULL) ? 1 : 0); return 0; } diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c index 7672fadb068..fdacda84875 100644 --- a/storage/maria/lockman.c +++ b/storage/maria/lockman.c @@ -1,6 +1,7 @@ -#warning TODO - allocate everything from dynarrays !!! (benchmark) -#warning TODO instant duration locks -#warning automatically place S instead of LS if possible +/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */ +/* QQ: TODO instant duration locks */ +/* QQ: #warning automatically place S instead of LS if possible */ + /* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify @@ -218,7 +219,7 @@ typedef struct lockman_lock { struct lockman_lock *lonext; intptr volatile link; uint32 hashnr; -#warning TODO - remove hashnr from LOCK + /* QQ: TODO - remove hashnr from LOCK */ uint16 loid; uchar lock; /* sizeof(uchar) <= sizeof(enum) */ uchar flags; @@ -428,9 +429,11 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, } if (res & LOCK_UPGRADE) cursor.upgrade_from->flags|= IGNORE_ME; -#warning is this OK ? if a reader has already read upgrade_from, \ - it may find it conflicting with node :( -#warning another bug - see the last test from test_lockman_simple() + /* + QQ: is this OK ? if a reader has already read upgrade_from, + it may find it conflicting with node :( + - see the last test from test_lockman_simple() + */ } } while (res == REPEAT_ONCE_MORE); @@ -673,7 +676,7 @@ enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER, if there're other active LOCK_OWNERs. */ -#warning race condition here + /* QQ: race condition here */ pthread_mutex_lock(wait_for_lo->mutex); if (DELETED(blocker->link)) { @@ -749,7 +752,7 @@ int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo) } #ifdef MY_LF_EXTRA_DEBUG -static char *lock2str[]= +static const char *lock2str[]= { "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" }; /* NOTE @@ -764,8 +767,9 @@ void print_lockhash(LOCKMAN *lm) intptr next= el->link; if (el->hashnr & 1) { - printf("0x%08x { resource %llu, loid %u, lock %s", - el->hashnr, el->resource, el->loid, lock2str[el->lock]); + printf("0x%08lx { resource %lu, loid %u, lock %s", + (long) el->hashnr, (ulong) el->resource, el->loid, + lock2str[el->lock]); if (el->flags & IGNORE_ME) printf(" IGNORE_ME"); if (el->flags & UPGRADED) printf(" UPGRADED"); if (el->flags & ACTIVE) printf(" ACTIVE"); @@ -781,4 +785,3 @@ void print_lockhash(LOCKMAN *lm) } } #endif - diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 6eac151d76e..f1345b2c2f3 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -967,7 +967,8 @@ static my_bool write_full_pages(MARIA_HA *info, my_off_t position; DBUG_ENTER("write_full_pages"); DBUG_PRINT("enter", ("length: %lu page: %lu page_count: %lu", - length, (ulong) block->page, block->page_count)); + (ulong) length, (ulong) block->page, + (ulong) block->page_count)); info->keybuff_used= 1; page= block->page; @@ -988,7 +989,7 @@ static my_bool write_full_pages(MARIA_HA *info, page= block->page; page_count= block->page_count - 1; DBUG_PRINT("info", ("page: %lu page_count: %lu", - (ulong) block->page, block->page_count)); + (ulong) block->page, (ulong) block->page_count)); position= (page + page_count + 1) * block_size; if (info->state->data_file_length < position) @@ -2387,18 +2388,18 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, blob_buffer+= blob_length; break; } -#ifdef EXTRA_DEBUG default: +#ifdef EXTRA_DEBUG DBUG_ASSERT(0); /* purecov: deadcode */ - goto err; #endif + goto err; } continue; } if (row_extents) { - DBUG_PRINT("info", ("Row read: page_count: %lu extent_count: %lu", + DBUG_PRINT("info", ("Row read: page_count: %u extent_count: %u", extent.page_count, extent.extent_count)); *extent.tail_positions= 0; /* End marker */ if (extent.page_count) diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 682ce276a5b..ccce19de994 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -1078,6 +1078,10 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, char llbuff[22],llbuff2[22],llbuff3[22]; DBUG_ENTER("check_dynamic_record"); + LINT_INIT(left_length); + LINT_INIT(start_recpos); + LINT_INIT(to); + pos= 0; while (pos < info->state->data_file_length) { @@ -1096,7 +1100,8 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, (flag ? 0 : READING_NEXT) | READING_HEADER)) { _ma_check_print_error(param, - "got error: %d when reading datafile at position: %s", + "got error: %d when reading datafile at " + "position: %s", my_errno, llstr(start_block, llbuff)); DBUG_RETURN(1); } @@ -1309,7 +1314,8 @@ static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend, start_recpos= pos; param->splits++; VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info, - &info->rec_buff, -1, start_recpos)); + &info->rec_buff, &info->rec_buff_size, -1, + start_recpos)); pos=block_info.filepos+block_info.rec_len; if (block_info.rec_len < (uint) info->s->min_pack_length || block_info.rec_len > (uint) info->s->max_pack_length) @@ -1727,7 +1733,7 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) param->used= param->link_used= param->splits= param->del_length= 0; param->tmp_record_checksum= param->glob_crc= 0; param->err_count= 0; - LINT_INIT(left_length); LINT_INIT(start_recpos); LINT_INIT(to); + error= 0; param->empty= info->s->pack.header_length; @@ -2206,7 +2212,7 @@ static int writekeys(MARIA_SORT_PARAM *sort_param) } /* Remove checksum that was added to glob_crc in sort_get_next_record */ if (sort_param->calc_checksum) - sort_param->glob_crc-= info->cur_row.checksum; + sort_param->sort_info->param->glob_crc-= info->cur_row.checksum; DBUG_PRINT("error",("errno: %d",my_errno)); DBUG_RETURN(-1); } /* writekeys */ @@ -2317,8 +2323,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) param->temp_filename); DBUG_RETURN(-1); } - if (new_header_length && - maria_filecopy(param, new_file,share->kfile,0L, + if (maria_filecopy(param, new_file,share->kfile,0L, (ulong) share->base.keystart, "headerblock")) goto err; @@ -3070,8 +3075,9 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, param->temp_filename); goto err; } - if (maria_filecopy(param, new_file,info->dfile,0L,new_header_length, - "datafile-header")) + if (new_header_length && + maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) goto err; if (param->testflag & T_UNPACK) restore_data_file_type(share); @@ -3814,16 +3820,18 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (left_length < block_info.data_len || ! block_info.data_len) { _ma_check_print_info(param, - "Found block with too small length at %s; Skipped", - llstr(sort_param->start_recpos,llbuff)); + "Found block with too small length at %s; " + "Skipped", + llstr(sort_param->start_recpos,llbuff)); goto try_next; } if (block_info.filepos + block_info.data_len > sort_param->read_cache.end_of_file) { _ma_check_print_info(param, - "Found block that points outside data file at %s", - llstr(sort_param->start_recpos,llbuff)); + "Found block that points outside data file " + "at %s", + llstr(sort_param->start_recpos,llbuff)); goto try_next; } /* @@ -3923,7 +3931,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } sort_param->start_recpos=sort_param->pos; if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info, - &sort_param->rec_buff, -1, sort_param->pos)) + &sort_param->rec_buff, + &sort_param->rec_buff_size, -1, + sort_param->pos)) DBUG_RETURN(-1); if (!block_info.rec_len && sort_param->pos + MEMMAP_EXTRA_MARGIN == diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index deab2b9c983..036fd305c4d 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -64,7 +64,7 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, byte *key, TODO: nulls processing */ #ifdef HAVE_SPATIAL - DBUG_RETURN(sp_make_key(info,keynr, key,record,filepos)); + DBUG_RETURN(_ma_sp_make_key(info,keynr, key,record,filepos)); #else DBUG_ASSERT(0); /* maria_open should check that this never happens*/ #endif @@ -113,10 +113,10 @@ uint _ma_make_key(register MARIA_HA *info, uint keynr, byte *key, } else { - byte *end= pos + length; + const byte *end= pos + length; while (pos < end && pos[0] == ' ') pos++; - length=(uint) (end-pos); + length= (uint) (end-pos); } FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c index 6e481c0bc6d..7c875e7b91d 100644 --- a/storage/maria/ma_packrec.c +++ b/storage/maria/ma_packrec.c @@ -1288,7 +1288,7 @@ _ma_mempack_get_block_info(MARIA_HA *maria, header+= read_pack_length((uint) maria->s->pack.version, header, &info->blob_len); /* _ma_alloc_rec_buff sets my_errno on error */ - if (_ma_alloc_buffer(rec_buff_p, rec_buff_size, + if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p, info->blob_len + maria->s->base.extra_rec_buff_size)) return 0; /* not enough memory */ bit_buff->blob_pos= (uchar*) *rec_buff_p; diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c index 17eae760a3a..ad27d3c286c 100644 --- a/storage/maria/ma_rkey.c +++ b/storage/maria/ma_rkey.c @@ -123,7 +123,8 @@ int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, value. */ if (search_flag == HA_READ_KEY_EXACT && - ha_key_cmp(keyinfo->seg, key_buff, info->lastkey, use_key_length, + ha_key_cmp(keyinfo->seg, (uchar*) key_buff, + (uchar*) info->lastkey, use_key_length, SEARCH_FIND, not_used)) { my_errno= HA_ERR_KEY_NOT_FOUND; diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c index b63f99379ea..9134645f847 100644 --- a/storage/maria/ma_sort.c +++ b/storage/maria/ma_sort.c @@ -534,8 +534,7 @@ int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) (ulonglong) info->state->records); } my_free((gptr) sinfo->sort_keys,MYF(0)); - my_free(_ma_get_rec_buff_ptr(info, sinfo->rec_buff), - MYF(MY_ALLOW_ZERO_PTR)); + my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); sinfo->sort_keys=0; } diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h index 8b9dd204ded..6aac741bb2c 100644 --- a/storage/maria/ma_sp_defs.h +++ b/storage/maria/ma_sp_defs.h @@ -41,8 +41,8 @@ enum wkbByteOrder wkbNDR = 1 /* Little Endian */ }; -uint sp_make_key(register MARIA_HA *info, uint keynr, byte *key, - const byte *record, my_off_t filepos); +uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, my_off_t filepos); #endif /*HAVE_SPATIAL*/ #endif /* _SP_DEFS_H */ diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c index 79345550dd9..b365f8deb0f 100644 --- a/storage/maria/ma_sp_key.c +++ b/storage/maria/ma_sp_key.c @@ -37,8 +37,8 @@ static void get_double(double *d, const byte *pos) float8get(*d, pos); } -uint sp_make_key(register MARIA_HA *info, uint keynr, byte *key, - const byte *record, my_off_t filepos) +uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, my_off_t filepos) { HA_KEYSEG *keyseg; MARIA_KEYDEF *keyinfo = &info->s->keyinfo[keynr]; diff --git a/storage/maria/ma_test_all.res b/storage/maria/ma_test_all.res index 7ffd3378b51..57b0feeeae8 100644 --- a/storage/maria/ma_test_all.res +++ b/storage/maria/ma_test_all.res @@ -1,53 +1,62 @@ -maria_chk: MARIA file test1 -maria_chk: warning: Size of indexfile is: 8192 Should be: 16384 -MARIA-table 'test1' is usable but should be fixed +Running tests with dynamic row format +Running tests with static row format +Running tests with block row format ma_test2 -s -L -K -R1 -m2000 ; Should give error 135 -Error: 135 in write at record: 1105 +Error: 135 in write at record: 1099 got error: 135 when using MARIA-database +./maria_chk -sm test2 will warn that 'Datafile is almost full' maria_chk: MARIA file test2 -maria_chk: warning: Datafile is almost full, 65532 of 65534 used +maria_chk: warning: Datafile is almost full, 65516 of 65534 used MARIA-table 'test2' is usable but should be fixed -Commands Used count Errors Recover errors -open 1 0 0 -write 50 0 0 -update 5 0 0 -delete 50 0 0 -close 1 0 0 -extra 6 0 0 -Total 113 0 0 -Commands Used count Errors Recover errors -open 2 0 0 -write 100 0 0 -update 10 0 0 -delete 100 0 0 -close 2 0 0 -extra 12 0 0 -Total 226 0 0 -real 0m0.994s -user 0m0.432s -sys 0m0.184s +real 0m0.808s +user 0m0.584s +sys 0m0.212s -real 0m2.153s -user 0m1.196s -sys 0m0.228s +real 0m0.780s +user 0m0.584s +sys 0m0.176s -real 0m1.483s -user 0m0.772s +real 0m0.809s +user 0m0.616s sys 0m0.180s -real 0m1.992s -user 0m1.180s +real 0m1.356s +user 0m1.140s sys 0m0.188s -real 0m2.028s +real 0m0.783s +user 0m0.600s +sys 0m0.176s + +real 0m1.390s user 0m1.184s sys 0m0.152s -real 0m1.878s -user 0m1.028s -sys 0m0.136s +real 0m1.875s +user 0m1.632s +sys 0m0.244s -real 0m1.980s -user 0m1.116s -sys 0m0.192s +real 0m1.313s +user 0m1.148s +sys 0m0.160s + +real 0m1.846s +user 0m1.644s +sys 0m0.188s + +real 0m1.875s +user 0m1.632s +sys 0m0.212s + +real 0m1.819s +user 0m1.672s +sys 0m0.124s + +real 0m2.117s +user 0m1.816s +sys 0m0.292s + +real 0m1.871s +user 0m1.636s +sys 0m0.196s diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index b79c5558933..5d0133c8ac1 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -26,7 +26,7 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, MARIA_KEYDEF *key= &info->s->keyinfo[def->key]; byte *key_buff= info->lastkey2; DBUG_ENTER("_ma_check_unique"); - DBUG_PRINT("enter",("unique_hash: %lu", unique_hash)); + DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash)); maria_unique_store(record+key->seg->start, unique_hash); _ma_make_key(info,def->key,key_buff,record,0); diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c index d8dffa09a5e..810c6c12ea4 100644 --- a/storage/maria/tablockman.c +++ b/storage/maria/tablockman.c @@ -1,5 +1,5 @@ -#warning TODO - allocate everything from dynarrays !!! (benchmark) -#warning automatically place S instead of LS if possible +/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */ +/* QQ: automatically place S instead of LS if possible */ /* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify @@ -219,7 +219,7 @@ static const enum lockman_getlock_result getlock_result[10][10]= */ struct st_table_lock { -#warning do we need upgraded_from ? + /* QQ: do we need upgraded_from ? */ struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev; struct st_locked_table *table; uint16 loid; diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h index 4498e7027b4..2c6fb6996a3 100644 --- a/storage/maria/tablockman.h +++ b/storage/maria/tablockman.h @@ -33,7 +33,7 @@ LSIX - Loose Shared + Intention eXclusive */ #ifndef _lockman_h -#warning TODO remove N-locks +/* QQ: TODO remove N-locks */ enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; enum lockman_getlock_result { NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index f289f6fcc5b..37978e4ff76 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -351,8 +351,8 @@ void trnman_end_trn(TRN *trn, my_bool commit) those lists, and thus nobody may want to free them. Now we don't need a mutex to access free_me list */ + /* QQ: send them to the purge thread */ while (free_me) -#warning XXX send them to the purge thread { TRN *t= free_me; free_me= free_me->next; diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c index 2a8090ab9ac..01af1a03d22 100644 --- a/storage/maria/unittest/lockman2-t.c +++ b/storage/maria/unittest/lockman2-t.c @@ -171,10 +171,12 @@ void run_test(const char *test, pthread_handler handler, int n, int m) static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) { +#ifdef NOT_USED_YET TABLE_LOCK_OWNER backup= *lo; +#endif tablockman_release_locks(lm, lo); - /* +#ifdef NOT_USED_YET pthread_mutex_destroy(lo->mutex); pthread_cond_destroy(lo->cond); bzero(lo, sizeof(*lo)); @@ -183,7 +185,8 @@ static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) lo->cond= backup.cond; lo->loid= backup.loid; pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST); - pthread_cond_init(lo->cond, 0);*/ + pthread_cond_init(lo->cond, 0); +#endif } pthread_mutex_t rt_mutex; @@ -191,8 +194,8 @@ int Nrows= 100; int Ntables= 10; int table_lock_ratio= 10; enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; -char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; -char *res2str[]= { +const char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +const char *res2str[]= { 0, "OUT OF MEMORY", "DEADLOCK", @@ -200,6 +203,7 @@ char *res2str[]= { "GOT THE LOCK", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; + pthread_handler_t test_lockman(void *arg) { int m= (*(int *)arg); @@ -215,13 +219,16 @@ pthread_handler_t test_lockman(void *arg) for (x= ((int)(intptr)(&m)); m > 0; m--) { - x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + /* three prime numbers */ + x= (uint) ((x*LL(3628273133) + LL(1500450271)) % LL(9576890767)); row= x % Nrows + Ntables; table= row % Ntables; locklevel= (x/Nrows) & 3; if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) - { /* table lock */ - res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); + { + /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, + lock_array[locklevel]); DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, lock2str[locklevel], res2str[res])); if (res < GOT_THE_LOCK) diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index beb86843dd3..49575378c78 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -42,7 +42,7 @@ char file_name[FN_REFLEN]; LSN expect_checkpoint_lsn; uint32 expect_logno; -static int delete_file(); +static int delete_file(myf my_flags); /* Those are test-specific wrappers around the module's API functions: after calling the module's API functions they perform checks on the result. @@ -91,7 +91,7 @@ int main(int argc,char *argv[]) get_options(argc,argv); diag("Deleting control file at startup, if there is an old one"); - RET_ERR_UNLESS(0 == delete_file()); /* if fails, can't continue */ + RET_ERR_UNLESS(0 == delete_file(0)); /* if fails, can't continue */ diag("Tests of normal conditions"); ok(0 == test_one_log(), "test of creating one log"); @@ -111,7 +111,7 @@ int main(int argc,char *argv[]) } -static int delete_file() +static int delete_file(myf my_flags) { RET_ERR_UNLESS(fn_format(file_name, CONTROL_FILE_BASE_NAME, maria_data_root, "", MYF(MY_WME)) != NullS); @@ -119,7 +119,7 @@ static int delete_file() Maybe file does not exist, ignore error. The error will however be printed on stderr. */ - my_delete(file_name, MYF(MY_WME)); + my_delete(file_name, my_flags); expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; @@ -365,7 +365,7 @@ static int test_bad_size() int fd; /* A too short file */ - RET_ERR_UNLESS(delete_file() == 0); + RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0); RET_ERR_UNLESS((fd= my_open(file_name, O_BINARY | O_RDWR | O_CREAT, MYF(MY_WME))) >= 0); @@ -378,7 +378,7 @@ static int test_bad_size() RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); /* Leave a correct control file */ - RET_ERR_UNLESS(delete_file() == 0); + RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0); RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); RET_ERR_UNLESS(close_file() == 0); diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c index 3c70d10c440..7d97794b685 100644 --- a/storage/maria/unittest/trnman-t.c +++ b/storage/maria/unittest/trnman-t.c @@ -35,7 +35,7 @@ int litmus; pthread_handler_t test_trnman(void *arg) { int m= (*(int *)arg); - uint x, y, i, j, n; + uint x, y, i, n; TRN *trn[MAX_ITER]; pthread_mutex_t mutexes[MAX_ITER]; pthread_cond_t conds[MAX_ITER]; @@ -48,7 +48,7 @@ pthread_handler_t test_trnman(void *arg) for (x= ((int)(intptr)(&m)); m > 0; ) { - y= x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + y= x= (x*LL(3628273133) + LL(1500450271)) % LL(9576890767); /* three prime numbers */ m-= n= x % MAX_ITER; for (i= 0; i < n; i++) { @@ -65,7 +65,6 @@ pthread_handler_t test_trnman(void *arg) trnman_end_trn(trn[i], y & 1); } } -end: for (i= 0; i < MAX_ITER; i++) { pthread_mutex_destroy(&mutexes[i]); diff --git a/storage/ndb/src/mgmapi/mgmapi.cpp b/storage/ndb/src/mgmapi/mgmapi.cpp index 614d0fea8e2..60201eac431 100644 --- a/storage/ndb/src/mgmapi/mgmapi.cpp +++ b/storage/ndb/src/mgmapi/mgmapi.cpp @@ -1274,13 +1274,13 @@ ndb_mgm_get_clusterlog_severity_filter(NdbMgmHandle handle, MGM_ARG(clusterlog_severity_names[5], Int, Mandatory, ""), MGM_ARG(clusterlog_severity_names[6], Int, Mandatory, ""), }; - CHECK_HANDLE(handle, NULL); - CHECK_CONNECTED(handle, NULL); + CHECK_HANDLE(handle, 0); + CHECK_CONNECTED(handle, 0); Properties args; const Properties *reply; reply = ndb_mgm_call(handle, getinfo_reply, "get info clusterlog", &args); - CHECK_REPLY(reply, NULL); + CHECK_REPLY(reply, 0); for(unsigned int i=0; i < severity_size; i++) { reply->get(clusterlog_severity_names[severity[i].category], &severity[i].value); @@ -1431,13 +1431,13 @@ ndb_mgm_get_clusterlog_loglevel(NdbMgmHandle handle, MGM_ARG(clusterlog_names[10], Int, Mandatory, ""), MGM_ARG(clusterlog_names[11], Int, Mandatory, ""), }; - CHECK_HANDLE(handle, NULL); - CHECK_CONNECTED(handle, NULL); + CHECK_HANDLE(handle, 0); + CHECK_CONNECTED(handle, 0); Properties args; const Properties *reply; reply = ndb_mgm_call(handle, getloglevel_reply, "get cluster loglevel", &args); - CHECK_REPLY(reply, NULL); + CHECK_REPLY(reply, 0); for(int i=0; i < loglevel_count; i++) { reply->get(clusterlog_names[loglevel[i].category], &loglevel[i].value); diff --git a/unittest/mysys/mf_pagecache_consist.c b/unittest/mysys/mf_pagecache_consist.c index d31c19cc746..fd824e27051 100755 --- a/unittest/mysys/mf_pagecache_consist.c +++ b/unittest/mysys/mf_pagecache_consist.c @@ -101,7 +101,7 @@ uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, (page_locked ? "locked" : "unlocked"), end, num, tag); h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0)); - my_pwrite(h, buff, PAGE_SIZE, 0, MYF(0)); + my_pwrite(h, (byte*) buff, PAGE_SIZE, 0, MYF(0)); my_close(h, MYF(0)); goto err; } diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 02e07acd67b..c16a2f118e5 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -252,11 +252,11 @@ void test_atomic(const char *test, pthread_handler handler, int n, int m) for (i= 0 ; i < n ; i++) pthread_join(threads[i], 0); now= my_getsystime()-now; -err: ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32); my_free((void *)threads, MYF(0)); } + int main() { int err; From 025400922118f11a15be54c66455f20e2f72c0b4 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 2 Feb 2007 09:41:32 +0200 Subject: [PATCH 87/95] postreview changes for page cache and pre review commit for loghandler storage/maria/unittest/test_file.c: Rename: unittest/mysys/test_file.c -> storage/maria/unittest/test_file.c storage/maria/unittest/test_file.h: Rename: unittest/mysys/test_file.h -> storage/maria/unittest/test_file.h include/pagecache.h: A waiting queue mechanism moved to separate file wqueue.* Pointer name changed for compatibility mysys/Makefile.am: A waiting queue mechanism moved to separate file wqueue.* mysys/mf_keycache.c: fixed unsigned comparison mysys/mf_pagecache.c: A waiting queue mechanism moved to separate file wqueue.* Fixed bug in unregistering block during write storage/maria/Makefile.am: The loghandler files added storage/maria/ma_control_file.h: Now we have loghandler and can compile control file storage/maria/maria_def.h: Including files need for compilation of maria storage/maria/unittest/Makefile.am: unit tests of loghandler storage/maria/unittest/ma_control_file-t.c: Used maria def storage/maria/unittest/mf_pagecache_consist.c: fixed memory overrun storage/maria/unittest/mf_pagecache_single.c: fixed used uninitialized memory unittest/mysys/Makefile.am: unittests of pagecache moved to maria becase pagecache need loghandler include/wqueue.h: New BitKeeper file ``include/wqueue.h'' mysys/wqueue.c: New BitKeeper file ``mysys/wqueue.c'' storage/maria/ma_loghandler.c: New BitKeeper file ``storage/maria/ma_loghandler.c'' storage/maria/ma_loghandler.h: New BitKeeper file ``storage/maria/ma_loghandler.h'' storage/maria/ma_loghandler_lsn.h: New BitKeeper file ``storage/maria/ma_loghandler_lsn.h'' storage/maria/unittest/ma_test_loghandler-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler-t.c'' storage/maria/unittest/ma_test_loghandler_multigroup-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_multigroup-t.c'' storage/maria/unittest/ma_test_loghandler_multithread-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_multithread-t.c'' storage/maria/unittest/ma_test_loghandler_pagecache-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_pagecache-t.c'' --- include/pagecache.h | 65 +- include/wqueue.h | 26 + mysys/Makefile.am | 2 +- mysys/mf_keycache.c | 2 +- mysys/mf_pagecache.c | 776 ++- mysys/wqueue.c | 167 + storage/maria/Makefile.am | 2 +- storage/maria/ma_control_file.h | 22 +- storage/maria/ma_loghandler.c | 5417 +++++++++++++++++ storage/maria/ma_loghandler.h | 314 + storage/maria/ma_loghandler_lsn.h | 39 + storage/maria/maria_def.h | 5 + storage/maria/unittest/Makefile.am | 60 +- storage/maria/unittest/ma_control_file-t.c | 2 +- storage/maria/unittest/ma_test_loghandler-t.c | 540 ++ .../ma_test_loghandler_multigroup-t.c | 570 ++ .../ma_test_loghandler_multithread-t.c | 468 ++ .../unittest/ma_test_loghandler_pagecache-t.c | 140 + .../maria/unittest}/mf_pagecache_consist.c | 33 +- .../maria/unittest}/mf_pagecache_single.c | 6 +- .../maria/unittest}/test_file.c | 0 .../maria/unittest}/test_file.h | 0 unittest/mysys/Makefile.am | 43 +- 23 files changed, 8160 insertions(+), 539 deletions(-) create mode 100644 include/wqueue.h create mode 100644 mysys/wqueue.c create mode 100644 storage/maria/ma_loghandler.c create mode 100644 storage/maria/ma_loghandler.h create mode 100644 storage/maria/ma_loghandler_lsn.h create mode 100644 storage/maria/unittest/ma_test_loghandler-t.c create mode 100644 storage/maria/unittest/ma_test_loghandler_multigroup-t.c create mode 100644 storage/maria/unittest/ma_test_loghandler_multithread-t.c create mode 100644 storage/maria/unittest/ma_test_loghandler_pagecache-t.c rename {unittest/mysys => storage/maria/unittest}/mf_pagecache_consist.c (95%) rename {unittest/mysys => storage/maria/unittest}/mf_pagecache_single.c (99%) rename {unittest/mysys => storage/maria/unittest}/test_file.c (100%) rename {unittest/mysys => storage/maria/unittest}/test_file.h (100%) diff --git a/include/pagecache.h b/include/pagecache.h index 4d64070ad62..2f745eae0b3 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -20,11 +20,13 @@ #define _pagecache_h C_MODE_START +#include "../storage/maria/ma_loghandler_lsn.h" + /* Type of the page */ enum pagecache_page_type { #ifndef DBUG_OFF - /* used only for control page type chenging during debugging */ + /* used only for control page type changing during debugging */ PAGECACHE_EMPTY_PAGE, #endif /* the page does not contain LSN */ @@ -34,7 +36,7 @@ enum pagecache_page_type }; /* - This enum describe lock status changing. every typr of page cache will + This enum describe lock status changing. every type of page cache will interpret WRITE/READ lock as it need. */ enum pagecache_page_lock @@ -71,9 +73,7 @@ enum pagecache_write_mode typedef void *PAGECACHE_PAGE_LINK; -/* TODO: move to loghandler emulator */ -typedef void LOG_HANDLER; -typedef void *LSN; +typedef void *LSN_PTR; /* file descriptor for Maria */ typedef struct st_pagecache_file @@ -82,7 +82,7 @@ typedef struct st_pagecache_file } PAGECACHE_FILE; /* page number for maria */ -typedef uint32 maria_page_no_t; +typedef uint32 pgcache_page_no_t; /* declare structures that is used by st_pagecache */ @@ -93,11 +93,9 @@ typedef struct st_pagecache_page PAGECACHE_PAGE; struct st_pagecache_hash_link; typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK; -/* info about requests in a waiting queue */ -typedef struct st_pagecache_wqueue -{ - struct st_my_thread_var *last_thread; /* circular list of waiting threads */ -} PAGECACHE_WQUEUE; +#include + +typedef my_bool (*pagecache_disk_read_validator)(byte *page, gptr data); #define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */ @@ -136,16 +134,14 @@ typedef struct st_pagecache PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */ PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */ pthread_mutex_t cache_lock; /* to lock access to the cache structure */ - PAGECACHE_WQUEUE resize_queue; /* threads waiting during resize operation */ - PAGECACHE_WQUEUE waiting_for_hash_link;/* waiting for a free hash link */ - PAGECACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */ + WQUEUE resize_queue; /* threads waiting during resize operation */ + WQUEUE waiting_for_hash_link;/* waiting for a free hash link */ + WQUEUE waiting_for_block; /* requests waiting for a free block */ /* hash for dirty file bl.*/ PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; /* hash for other file bl.*/ PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; - LOG_HANDLER *loghandler; /* loghandler structure */ - /* The following variables are and variables used to hold parameters for initializing the key cache. @@ -169,24 +165,29 @@ typedef struct st_pagecache extern int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold, - uint block_size, - LOG_HANDLER *loghandler); + uint block_size); extern int resize_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold); extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, uint age_threshold); -extern byte *pagecache_read(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - uint level, - byte *buff, - enum pagecache_page_type type, - enum pagecache_page_lock lock, - PAGECACHE_PAGE_LINK *link); + +#define pagecache_read(P,F,N,L,B,T,K,I) \ + pagecache_valid_read(P,F,N,L,B,T,K,I,0,0) + +extern byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data); extern my_bool pagecache_write(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, uint level, byte *buff, enum pagecache_page_type type, @@ -196,20 +197,20 @@ extern my_bool pagecache_write(PAGECACHE *pagecache, PAGECACHE_PAGE_LINK *link); extern void pagecache_unlock_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, enum pagecache_page_lock lock, enum pagecache_page_pin pin, my_bool stamp_this_page, - LSN first_REDO_LSN_for_page); + LSN_PTR first_REDO_LSN_for_page); extern void pagecache_unlock(PAGECACHE *pagecache, PAGECACHE_PAGE_LINK *link, enum pagecache_page_lock lock, enum pagecache_page_pin pin, my_bool stamp_this_page, - LSN first_REDO_LSN_for_page); + LSN_PTR first_REDO_LSN_for_page); extern void pagecache_unpin_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno); + pgcache_page_no_t pageno); extern void pagecache_unpin(PAGECACHE *pagecache, PAGECACHE_PAGE_LINK *link); extern int flush_pagecache_blocks(PAGECACHE *keycache, @@ -217,7 +218,7 @@ extern int flush_pagecache_blocks(PAGECACHE *keycache, enum flush_type type); extern my_bool pagecache_delete_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, enum pagecache_page_lock lock, my_bool flush); extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); diff --git a/include/wqueue.h b/include/wqueue.h new file mode 100644 index 00000000000..bacabb8c401 --- /dev/null +++ b/include/wqueue.h @@ -0,0 +1,26 @@ + +#ifndef _wqueue_h +#define _wqueue_h + +#include +#include + +/* info about requests in a waiting queue */ +typedef struct st_pagecache_wqueue +{ + struct st_my_thread_var *last_thread; /* circular list of waiting + threads */ +} WQUEUE; + +#ifdef THREAD +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, + pthread_mutex_t *lock); +void wqueue_release_queue(WQUEUE *wqueue); + +#endif + +#endif diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 4d9570febbd..612411404c4 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -56,7 +56,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ my_windac.c my_access.c base64.c my_libwrap.c \ - mf_pagecache.c + mf_pagecache.c wqueue.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 9a99a278bc5..9cb428ab200 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -1008,12 +1008,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) KEYCACHE_THREAD_TRACE("unlink_block"); #if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0); keycache->blocks_available--; KEYCACHE_DBUG_PRINT("unlink_block", ("unlinked block %u status=%x #requests=%u #available=%u", BLOCK_NUMBER(block), block->status, block->requests, keycache->blocks_available)); - KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0); #endif } diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4b92f68d9bf..97cb542f329 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -26,7 +26,7 @@ When a new block is required it is first tried to pop one from the stack. If the stack is empty, it is tried to get a never-used block from the pool. If this is empty too, then a block is taken from the LRU ring, flushing it - to disk, if necessary. This is handled in find_key_block(). + to disk, if necessary. This is handled in find_block(). With the new free list, the blocks can have three temperatures: hot, warm and cold (which is free). This is remembered in the block header by the enum BLOCK_TEMPERATURE temperature variable. Remembering the @@ -91,13 +91,16 @@ /* In key cache we have external raw locking here we use SERIALIZED_READ_FROM_CACHE to avoid problem of reading - not consistent data from te page + not consistent data from the page. + (keycache functions (key_cache_read(), key_cache_insert() and + key_cache_write()) rely on external MyISAM lock, we don't) */ #define SERIALIZED_READ_FROM_CACHE yes #define BLOCK_INFO(B) \ DBUG_PRINT("info", \ - ("block 0x%lx, file %lu, page %lu, s %0x, hshL 0x%lx, req %u/%u", \ + ("block 0x%lx file %lu page %lu s %0x hshL 0x%lx req %u/%u " \ + "wrlock: %c", \ (ulong)(B), \ (ulong)((B)->hash_link ? \ (B)->hash_link->file.file : \ @@ -110,7 +113,8 @@ (uint) (B)->requests, \ (uint)((B)->hash_link ? \ (B)->hash_link->requests : \ - 0))) + 0), \ + ((block->status & BLOCK_WRLOCK)?'Y':'N'))) /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; @@ -138,7 +142,7 @@ typedef pthread_cond_t KEYCACHE_CONDVAR; struct st_pagecache_page { PAGECACHE_FILE file; /* file to which the page belongs to */ - maria_page_no_t pageno; /* number of the page in the file */ + pgcache_page_no_t pageno; /* number of the page in the file */ }; /* element in the chain of a hash table bucket */ @@ -149,7 +153,7 @@ struct st_pagecache_hash_link struct st_pagecache_block_link *block; /* reference to the block for the page: */ PAGECACHE_FILE file; /* from such a file */ - maria_page_no_t pageno; /* this page */ + pgcache_page_no_t pageno; /* this page */ uint requests; /* number of requests for the page */ }; @@ -162,7 +166,7 @@ struct st_pagecache_hash_link #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ #define BLOCK_WRLOCK 64 /* write locked block */ -/* page status, returned by find_key_block */ +/* page status, returned by find_block */ #define PAGE_READ 0 #define PAGE_TO_BE_READ 1 #define PAGE_WAIT_TO_BE_READ 2 @@ -232,7 +236,7 @@ typedef struct st_pagecache_lock_info node the node which should be linked */ -void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) +static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) { if ((node->next= *list)) node->next->prev= &(node->next); @@ -249,7 +253,7 @@ void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) node the node which should be unlinked */ -void info_unlink(PAGECACHE_PIN_INFO *node) +static void info_unlink(PAGECACHE_PIN_INFO *node) { if ((*node->prev= node->next)) node->next->prev= node->prev; @@ -271,8 +275,8 @@ void info_unlink(PAGECACHE_PIN_INFO *node) pointer to the information node of the thread in the list */ -PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, - struct st_my_thread_var *thread) +static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, + struct st_my_thread_var *thread) { register PAGECACHE_PIN_INFO *i= list; for(; i != 0; i= i->next) @@ -291,7 +295,7 @@ struct st_pagecache_block_link *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ struct st_pagecache_hash_link *hash_link; /* backward ptr to referring hash_link */ - PAGECACHE_WQUEUE + WQUEUE wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ uint requests; /* number of requests for the block */ byte *buffer; /* buffer for the block page */ @@ -310,8 +314,8 @@ struct st_pagecache_block_link #ifdef PAGECACHE_DEBUG /* debug checks */ -my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) +static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_pin"); @@ -367,9 +371,9 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, 1 - Error */ -my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) +static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_lock"); @@ -379,47 +383,47 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, switch(lock) { case PAGECACHE_LOCK_LEFT_UNLOCKED: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED); - if (info) + if (pin != PAGECACHE_PIN_LEFT_UNPINNED || + info) goto error; break; case PAGECACHE_LOCK_LEFT_READLOCKED: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_PIN_LEFT_PINNED); - if (info == 0 || info->write_lock) + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN_LEFT_PINNED) || + info == 0 || info->write_lock) goto error; break; case PAGECACHE_LOCK_LEFT_WRITELOCKED: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED); - if (info == 0 || !info->write_lock) + if (pin != PAGECACHE_PIN_LEFT_PINNED || + info == 0 || !info->write_lock) goto error; break; case PAGECACHE_LOCK_READ: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_PIN); - if (info != 0) + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN) || + info != 0) goto error; break; case PAGECACHE_LOCK_WRITE: - DBUG_ASSERT(pin == PAGECACHE_PIN); - if (info != 0) + if (pin != PAGECACHE_PIN || + info != 0) goto error; break; case PAGECACHE_LOCK_READ_UNLOCK: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_UNPIN); - if (info == 0 || info->write_lock) + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || info->write_lock) goto error; break; case PAGECACHE_LOCK_WRITE_UNLOCK: - DBUG_ASSERT(pin == PAGECACHE_UNPIN); - if (info == 0 || !info->write_lock) + if (pin != PAGECACHE_UNPIN || + info == 0 || !info->write_lock) goto error; break; case PAGECACHE_LOCK_WRITE_TO_READ: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED || - pin == PAGECACHE_UNPIN); - if (info == 0 || !info->write_lock) + if ((pin != PAGECACHE_PIN_LEFT_PINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || !info->write_lock) goto error; break; } @@ -439,12 +443,6 @@ error: #define FLUSH_CACHE 2000 /* sort this many blocks at once */ static int flush_all_key_blocks(PAGECACHE *pagecache); -#ifdef THREAD -static void link_into_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread); -static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread); -#endif static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); static void test_key_cache(PAGECACHE *pagecache, const char *where, my_bool lock); @@ -551,6 +549,7 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); #define pagecache_pthread_cond_signal pthread_cond_signal #endif /* defined(PAGECACHE_DEBUG) */ +extern my_bool translog_flush(LSN *lsn); /* Write page to the disk @@ -567,18 +566,28 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); 0 - OK !=0 - Error */ -uint pagecache_fwrite(PAGECACHE *pagecache, - PAGECACHE_FILE *filedesc, - byte *buffer, - maria_page_no_t pageno, - enum pagecache_page_type type, - myf flags) + +static uint pagecache_fwrite(PAGECACHE *pagecache, + PAGECACHE_FILE *filedesc, + byte *buffer, + pgcache_page_no_t pageno, + enum pagecache_page_type type, + myf flags) { DBUG_ENTER("pagecache_fwrite"); if (type == PAGECACHE_LSN_PAGE) { + LSN lsn; DBUG_PRINT("info", ("Log handler call")); - /* TODO: put here loghandler call */ + /* TODO: integrate with page format */ +#define PAGE_LSN_OFFSET 0 + lsn7korr(&lsn, buffer + PAGE_LSN_OFFSET); + /* + check CONTROL_FILE_IMPOSSIBLE_FILENO & + CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET + */ + DBUG_ASSERT(lsn.file_no != 0 && lsn.rec_offset != 0); + translog_flush(&lsn); } DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, (pageno)<<(pagecache->shift), flags)); @@ -628,8 +637,6 @@ static uint next_power(uint value) division_limit division limit (may be zero) age_threshold age threshold (may be zero) block_size size of block (should be power of 2) - loghandler logfandler pointer to call it in case of - pages with LSN RETURN VALUE number of blocks in the key cache, if successful, @@ -647,12 +654,11 @@ static uint next_power(uint value) int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold, - uint block_size, - LOG_HANDLER *loghandler) + uint block_size) { - int blocks, hash_links, length; + uint blocks, hash_links, length; int error; - DBUG_ENTER("init_key_cache"); + DBUG_ENTER("init_pagecache"); DBUG_ASSERT(block_size >= 512); PAGECACHE_DEBUG_OPEN; @@ -662,8 +668,6 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, DBUG_RETURN(0); } - pagecache->loghandler= loghandler; - pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; pagecache->global_cache_read= pagecache->global_cache_write= 0; pagecache->disk_blocks= -1; @@ -692,8 +696,8 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, for ( ; ; ) { /* Set my_hash_entries to the next bigger 2 power */ - if ((pagecache->hash_entries= next_power((uint)blocks)) < - ((uint)blocks) * 5/4) + if ((pagecache->hash_entries= next_power(blocks)) < + (blocks) * 5/4) pagecache->hash_entries<<= 1; hash_links= 2 * blocks; #if defined(MAX_THREADS) @@ -704,7 +708,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * pagecache->hash_entries))) + - ((ulong) blocks << pagecache->shift) > use_mem) + (((ulong) blocks) << pagecache->shift) > use_mem) blocks--; /* Allocate memory for cache page buffers */ if ((pagecache->block_mem= @@ -760,10 +764,10 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, pagecache->warm_blocks= 0; pagecache->min_warm_blocks= (division_limit ? blocks * division_limit / 100 + 1 : - (ulong)blocks); + blocks); pagecache->age_threshold= (age_threshold ? blocks * age_threshold / 100 : - (ulong)blocks); + blocks); pagecache->cnt_for_resize_op= 0; pagecache->resize_in_flush= 0; @@ -842,7 +846,8 @@ int resize_pagecache(PAGECACHE *pagecache, { int blocks; struct st_my_thread_var *thread; - PAGECACHE_WQUEUE *wqueue; + WQUEUE *wqueue; + DBUG_ENTER("resize_pagecache"); if (!pagecache->inited) @@ -859,7 +864,7 @@ int resize_pagecache(PAGECACHE *pagecache, #ifdef THREAD wqueue= &pagecache->resize_queue; thread= my_thread_var; - link_into_queue(wqueue, thread); + wqueue_link_into_queue(wqueue, thread); while (wqueue->last_thread->next != thread) { @@ -892,12 +897,11 @@ int resize_pagecache(PAGECACHE *pagecache, end_pagecache(pagecache, 0); /* Don't free mutex */ /* The following will work even if use_mem is 0 */ blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, - division_limit, age_threshold, - pagecache->loghandler); + division_limit, age_threshold); finish: #ifdef THREAD - unlink_from_queue(wqueue, thread); + wqueue_unlink_from_queue(wqueue, thread); /* Signal for the next resize request to proceeed if any */ if (wqueue->last_thread) { @@ -1027,146 +1031,6 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) } /* end_pagecache */ -#ifdef THREAD -/* - Link a thread into double-linked queue of waiting threads. - - SYNOPSIS - link_into_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be added to the queue - - RETURN VALUE - none - - NOTES. - Queue is represented by a circular list of the thread structures - The list is double-linked of the type (**prev,*next), accessed by - a pointer to the last element. -*/ - -static void link_into_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread) -{ - struct st_my_thread_var *last; - if (! (last= wqueue->last_thread)) - { - /* Queue is empty */ - thread->next= thread; - thread->prev= &thread->next; - } - else - { - thread->prev= last->next->prev; - last->next->prev= &thread->next; - thread->next= last->next; - last->next= thread; - } - wqueue->last_thread= thread; -} - -/* - Unlink a thread from double-linked queue of waiting threads - - SYNOPSIS - unlink_from_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be removed from the queue - - RETURN VALUE - none - - NOTES. - See NOTES for link_into_queue -*/ - -static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread) -{ - KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id)); - if (thread->next == thread) - /* The queue contains only one member */ - wqueue->last_thread= NULL; - else - { - thread->next->prev= thread->prev; - *thread->prev=thread->next; - if (wqueue->last_thread == thread) - wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, - thread->prev); - } - thread->next= NULL; -} - - -/* - Add a thread to single-linked queue of waiting threads - - SYNOPSIS - add_to_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be added to the queue - - RETURN VALUE - none - - NOTES. - Queue is represented by a circular list of the thread structures - The list is single-linked of the type (*next), accessed by a pointer - to the last element. -*/ - -static inline void add_to_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread) -{ - struct st_my_thread_var *last; - if (! (last= wqueue->last_thread)) - thread->next= thread; - else - { - thread->next= last->next; - last->next= thread; - } - wqueue->last_thread= thread; -} - - -/* - Remove all threads from queue signaling them to proceed - - SYNOPSIS - realease_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be added to the queue - - RETURN VALUE - none - - NOTES. - See notes for add_to_queue - When removed from the queue each thread is signaled via condition - variable thread->suspend. -*/ - -static void release_queue(PAGECACHE_WQUEUE *wqueue) -{ - struct st_my_thread_var *last= wqueue->last_thread; - struct st_my_thread_var *next= last->next; - struct st_my_thread_var *thread; - do - { - thread=next; - KEYCACHE_DBUG_PRINT("release_queue: signal", ("thread %ld", thread->id)); - pagecache_pthread_cond_signal(&thread->suspend); - next=thread->next; - thread->next= NULL; - } - while (thread != last); - wqueue->last_thread= NULL; -} -#endif - - /* Unlink a block from the chain of dirty/clean blocks */ @@ -1273,6 +1137,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, PAGECACHE_BLOCK_LINK *ins; PAGECACHE_BLOCK_LINK **ptr_ins; + BLOCK_INFO(block); KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); #ifdef THREAD if (!hot && pagecache->waiting_for_block.last_thread) @@ -1297,7 +1162,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, { KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); pagecache_pthread_cond_signal(&thread->suspend); - unlink_from_queue(&pagecache->waiting_for_block, thread); + wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread); block->requests++; } } @@ -1363,6 +1228,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) { + DBUG_ENTER("unlink_block"); + DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); if (block->next_used == block) /* The list contains only one member */ pagecache->used_last= pagecache->used_ins= NULL; @@ -1381,14 +1248,15 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) KEYCACHE_THREAD_TRACE("unlink_block"); #if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0); pagecache->blocks_available--; KEYCACHE_DBUG_PRINT("unlink_block", ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u", (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, block->requests, pagecache->blocks_available)); BLOCK_INFO(block); - KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0); #endif + DBUG_VOID_RETURN; } @@ -1591,7 +1459,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) { KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); pagecache_pthread_cond_signal(&thread->suspend); - unlink_from_queue(&pagecache->waiting_for_hash_link, thread); + wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread); } } while (thread != last_thread); @@ -1618,7 +1486,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) pagecache Pagecache reference file file ID pageno page number in the file - start where to put pointer to found hash link (for + start where to put pointer to found hash bucket (for direct referring it) RETURN @@ -1627,7 +1495,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, PAGECACHE_HASH_LINK ***start) { reg1 PAGECACHE_HASH_LINK *hash_link; @@ -1670,6 +1538,12 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); #endif } + if (hash_link) + { + /* Register the request for the page */ + hash_link->requests++; + } + DBUG_RETURN(hash_link); } @@ -1680,7 +1554,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno) + pgcache_page_no_t pageno) { reg1 PAGECACHE_HASH_LINK *hash_link; PAGECACHE_HASH_LINK **start; @@ -1693,7 +1567,7 @@ restart: /* try to find the page in the cache */ hash_link= get_present_hash_link(pagecache, file, pageno, &start); - if (! hash_link) + if (!hash_link) { /* There is no hash link in the hash table for the pair (file, pageno) */ if (pagecache->free_hash_list) @@ -1714,7 +1588,7 @@ restart: page.file= *file; page.pageno= pageno; thread->opt_info= (void *) &page; - link_into_queue(&pagecache->waiting_for_hash_link, thread); + wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread); KEYCACHE_DBUG_PRINT("get_hash_link: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, @@ -1723,14 +1597,15 @@ restart: #else KEYCACHE_DBUG_ASSERT(0); #endif + DBUG_PRINT("info", ("restarting...")); goto restart; } hash_link->file= *file; hash_link->pageno= pageno; link_hash(start, hash_link); + /* Register the request for the page */ + hash_link->requests++; } - /* Register the request for the page */ - hash_link->requests++; return hash_link; } @@ -1743,7 +1618,7 @@ restart: SYNOPSIS - find_key_block() + find_block() pagecache pointer to a page cache data structure file handler for the file to read page from pageno number of the page in the file @@ -1773,29 +1648,29 @@ restart: waits until first of this operations links any block back. */ -static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - int init_hits_left, - my_bool wrmode, - my_bool reg_req, - int *page_st) +static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + int init_hits_left, + my_bool wrmode, + my_bool reg_req, + int *page_st) { PAGECACHE_HASH_LINK *hash_link; PAGECACHE_BLOCK_LINK *block; int error= 0; int page_status; - DBUG_ENTER("find_key_block"); - KEYCACHE_THREAD_TRACE("find_key_block:begin"); + DBUG_ENTER("find_block"); + KEYCACHE_THREAD_TRACE("find_block:begin"); DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu", (uint) file->file, (ulong) pageno, (uint) wrmode)); - KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu", - (uint) file->file, (ulong) pageno, - (uint) wrmode)); + KEYCACHE_DBUG_PRINT("find_block", ("fd: %u pos: %lu wrmode: %lu", + (uint) file->file, (ulong) pageno, + (uint) wrmode)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "start of find_key_block", 0);); + test_key_cache(pagecache, "start of find_block", 0);); #endif restart: @@ -1840,10 +1715,10 @@ restart: { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; - add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); do { - KEYCACHE_DBUG_PRINT("find_key_block: wait", + KEYCACHE_DBUG_PRINT("find_block: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -1871,7 +1746,7 @@ restart: { /* This is a request for a page to be removed from cache */ - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("request for old page in block %u " "wrmode: %d block->status: %d", BLOCK_NUMBER(pagecache, block), wrmode, @@ -1888,17 +1763,17 @@ restart: else { hash_link->requests--; - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("request waiting for old page to be saved")); { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; /* Put the request into the queue of those waiting for the old page */ - add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); /* Wait until the request can be resubmitted */ do { - KEYCACHE_DBUG_PRINT("find_key_block: wait", + KEYCACHE_DBUG_PRINT("find_block: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -1909,11 +1784,13 @@ restart: /* No parallel requests in single-threaded case */ #endif } - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("request for old page resubmitted")); + DBUG_PRINT("info", ("restarting...")); /* Resubmit the request */ goto restart; } + block->status&= ~BLOCK_IN_SWITCH; } else { @@ -1941,7 +1818,8 @@ restart: pagecache->blocks_used++; } pagecache->blocks_unused--; - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT((block->status & BLOCK_WRLOCK)); + DBUG_ASSERT(block->pins > 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -1954,7 +1832,9 @@ restart: block->hash_link= hash_link; hash_link->block= block; page_status= PAGE_TO_BE_READ; - KEYCACHE_DBUG_PRINT("find_key_block", + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); + KEYCACHE_DBUG_PRINT("find_block", ("got free or never used block %u", BLOCK_NUMBER(pagecache, block))); } @@ -1973,10 +1853,10 @@ restart: { struct st_my_thread_var *thread= my_thread_var; thread->opt_info= (void *) hash_link; - link_into_queue(&pagecache->waiting_for_block, thread); + wqueue_link_into_queue(&pagecache->waiting_for_block, thread); do { - KEYCACHE_DBUG_PRINT("find_key_block: wait", + KEYCACHE_DBUG_PRINT("find_block: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -2001,19 +1881,18 @@ restart: reg_requests(pagecache, block,1); hash_link->block= block; } - else - { - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - } + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); if (block->hash_link != hash_link && ! (block->status & BLOCK_IN_SWITCH) ) { /* this is a primary request for a new page */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("got block %u for new page", BLOCK_NUMBER(pagecache, block))); @@ -2021,7 +1900,7 @@ restart: { /* The block contains a dirty page - push it out of the cache */ - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); pagecache_pthread_mutex_unlock(&pagecache->cache_lock); /* @@ -2054,7 +1933,7 @@ restart: unlink_hash(pagecache, block->hash_link); /* All pending requests for this page must be resubmitted */ if (block->wqueue[COND_FOR_SAVED].last_thread) - release_queue(&block->wqueue[COND_FOR_SAVED]); + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); @@ -2065,6 +1944,8 @@ restart: #endif block->hash_link= hash_link; page_status= PAGE_TO_BE_READ; + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); @@ -2072,7 +1953,7 @@ restart: else { /* This is for secondary requests for a new page only */ - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("block->hash_link: %p hash_link: %p " "block->status: %u", block->hash_link, hash_link, block->status )); @@ -2087,7 +1968,7 @@ restart: { if (reg_req) reg_requests(pagecache, block, 1); - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("block->hash_link: %p hash_link: %p " "block->status: %u", block->hash_link, hash_link, block->status )); @@ -2098,12 +1979,12 @@ restart: } KEYCACHE_DBUG_ASSERT(page_status != -1); - *page_st=page_status; + *page_st= page_status; DBUG_PRINT("info", ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", (ulong) block, (uint) file->file, (ulong) pageno, block->status, (uint) page_status)); - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", (ulong) block, (uint) file->file, (ulong) pageno, block->status, @@ -2111,16 +1992,16 @@ restart: #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "end of find_key_block",0);); + test_key_cache(pagecache, "end of find_block",0);); #endif - KEYCACHE_THREAD_TRACE("find_key_block:end"); + KEYCACHE_THREAD_TRACE("find_block:end"); DBUG_RETURN(block); } -void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block) +static void add_pin(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_add_pin"); + DBUG_ENTER("add_pin"); DBUG_PRINT("enter", ("block 0x%lx pins: %u", (ulong) block, block->pins)); @@ -2137,9 +2018,9 @@ void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block) DBUG_VOID_RETURN; } -void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) +static void remove_pin(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_remove_pin"); + DBUG_ENTER("remove_pin"); DBUG_PRINT("enter", ("block 0x%lx pins: %u", (ulong) block, block->pins)); @@ -2157,7 +2038,7 @@ void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) DBUG_VOID_RETURN; } #ifdef PAGECACHE_DEBUG -void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); @@ -2166,7 +2047,7 @@ void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) info_link((PAGECACHE_PIN_INFO **)&block->lock_list, (PAGECACHE_PIN_INFO *)info); } -void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) +static void info_remove_lock(PAGECACHE_BLOCK_LINK *block) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, @@ -2175,7 +2056,7 @@ void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) info_unlink((PAGECACHE_PIN_INFO *)info); my_free((gptr)info, MYF(0)); } -void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, @@ -2184,40 +2065,47 @@ void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) info->write_lock= wl; } #else -#define pagecache_add_lock(B,W) -#define pagecache_remove_lock(B) -#define pagecache_change_lock(B,W) +#define info_add_lock(B,W) +#define info_remove_lock(B) +#define info_change_lock(B,W) #endif /* - Put on the block "update" type lock + Put on the block write lock SYNOPSIS - pagecache_lock_block() + get_wrlock() pagecache pointer to a page cache data structure block the block to work with RETURN 0 - OK - 1 - Try to lock the block failed + 1 - Can't lock this block, need retry */ -my_bool pagecache_lock_block(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block) +static my_bool get_wrlock(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_lock_block"); + PAGECACHE_FILE file= block->hash_link->file; + pgcache_page_no_t pageno= block->hash_link->pageno; + DBUG_ENTER("get_wrlock"); + DBUG_PRINT("info", ("the block 0x%lx " + "files %d(%d) pages %d(%d)", + (ulong)block, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); BLOCK_INFO(block); while (block->status & BLOCK_WRLOCK) { - DBUG_PRINT("info", ("fail to lock, waiting...")); + DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block)); /* Lock failed we will wait */ #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; - add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); dec_counter_for_resize_op(pagecache); do { - KEYCACHE_DBUG_PRINT("pagecache_lock_block: wait", + KEYCACHE_DBUG_PRINT("get_wrlock: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -2227,35 +2115,61 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, DBUG_ASSERT(0); #endif BLOCK_INFO(block); - DBUG_RETURN(1); + if ((block->status & (BLOCK_REASSIGNED | BLOCK_IN_SWITCH)) || + file.file != block->hash_link->file.file || + pageno != block->hash_link->pageno) + { + DBUG_PRINT("info", ("the block 0x%lx changed => need retry" + "status %x files %d != %d or pages %d !=%d", + (ulong)block, block->status, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); + DBUG_RETURN(1); + } } - /* we are doing it by global cache mutex protectio, so it is OK */ + DBUG_ASSERT(block->pins == 0); + /* we are doing it by global cache mutex protection, so it is OK */ block->status|= BLOCK_WRLOCK; DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); DBUG_RETURN(0); } -void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) + +/* + Remove write lock from the block + + SYNOPSIS + release_wrlock() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK +*/ + +static void release_wrlock(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_unlock_block"); + DBUG_ENTER("release_wrlock"); BLOCK_INFO(block); DBUG_ASSERT(block->status & BLOCK_WRLOCK); + DBUG_ASSERT(block->pins > 0); block->status&= ~BLOCK_WRLOCK; DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); #ifdef THREAD /* release all threads waiting for write lock */ if (block->wqueue[COND_FOR_WRLOCK].last_thread) - release_queue(&block->wqueue[COND_FOR_WRLOCK]); + wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]); #endif BLOCK_INFO(block); DBUG_VOID_RETURN; } + /* - Try to lock/uplock and pin/unpin the block + Try to lock/unlock and pin/unpin the block SYNOPSIS - pagecache_make_lock_and_pin() + make_lock_and_pin() pagecache pointer to a page cache data structure block the block to work with lock lock change mode @@ -2266,12 +2180,12 @@ void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) 1 - Try to lock the block failed */ -my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) +static my_bool make_lock_and_pin(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) { - DBUG_ENTER("pagecache_make_lock_and_pin"); + DBUG_ENTER("make_lock_and_pin"); DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s", (ulong)block, BLOCK_NUMBER(pagecache, block), ((block->status & BLOCK_WRLOCK)?'Y':'N'), @@ -2287,53 +2201,47 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, { case PAGECACHE_LOCK_WRITE: /* free -> write */ /* Writelock and pin the buffer */ - if (pagecache_lock_block(pagecache, block)) + if (get_wrlock(pagecache, block)) { - DBUG_PRINT("info", ("restart")); - /* in case of fail pagecache_lock_block unlock cache */ - DBUG_RETURN(1); + /* can't lock => need retry */ + goto retry; } - /* The cache is locked so nothing afraid off */ - pagecache_add_pin(block); - pagecache_add_lock(block, 1); + + /* The cache is locked so nothing afraid of */ + add_pin(block); + info_add_lock(block, 1); break; case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ /* - Removes writelog and puts read lock (which is nothing in our + Removes write lock and puts read lock (which is nothing in our implementation) */ - pagecache_unlock_block(block); + release_wrlock(block); case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ -#ifndef DBUG_OFF if (pin == PAGECACHE_UNPIN) { - pagecache_remove_pin(block); + remove_pin(block); } -#endif -#ifdef PAGECACHE_DEBUG if (lock == PAGECACHE_LOCK_WRITE_TO_READ) { - pagecache_change_lock(block, 0); + info_change_lock(block, 0); } else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || lock == PAGECACHE_LOCK_READ_UNLOCK) { - pagecache_remove_lock(block); + info_remove_lock(block); } -#endif break; case PAGECACHE_LOCK_READ: /* free -> read */ -#ifndef DBUG_OFF if (pin == PAGECACHE_PIN) { /* The cache is locked so nothing afraid off */ - pagecache_add_pin(block); + add_pin(block); } - pagecache_add_lock(block, 0); + info_add_lock(block, 0); break; -#endif case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ break; /* do nothing */ @@ -2343,6 +2251,16 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, BLOCK_INFO(block); DBUG_RETURN(0); +retry: + DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); + BLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests != 0); + block->hash_link->requests--; + DBUG_ASSERT(block->requests != 0); + unreg_request(pagecache, block, 1); + BLOCK_INFO(block); + DBUG_RETURN(1); + } @@ -2355,6 +2273,8 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, pagecache pointer to a page cache data structure block block to which buffer the data is to be read primary <-> the current thread will read the data + validator validator of read from the disk data + validator_data pointer to the data need by the validator RETURN VALUE None @@ -2368,13 +2288,15 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, static void read_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, - my_bool primary) + my_bool primary, + pagecache_disk_read_validator validator, + gptr validator_data) { uint got_length; /* On entry cache_lock is locked */ - KEYCACHE_THREAD_TRACE("read_block"); + DBUG_ENTER("read_block"); if (primary) { /* @@ -2382,8 +2304,8 @@ static void read_block(PAGECACHE *pagecache, that submitted primary requests */ - KEYCACHE_DBUG_PRINT("read_block", - ("page to be read by primary request")); + DBUG_PRINT("read_block", + ("page to be read by primary request")); /* Page is not in buffer yet, is to be read from disk */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); @@ -2400,11 +2322,15 @@ static void read_block(PAGECACHE *pagecache, else block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); - KEYCACHE_DBUG_PRINT("read_block", - ("primary request: new page in cache")); + if (validator != NULL && + (*validator)(block->buffer, validator_data)) + block->status|= BLOCK_ERROR; + + DBUG_PRINT("read_block", + ("primary request: new page in cache")); /* Signal that all pending requests for this page now can be processed */ if (block->wqueue[COND_FOR_REQUESTED].last_thread) - release_queue(&block->wqueue[COND_FOR_REQUESTED]); + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); } else { @@ -2412,17 +2338,17 @@ static void read_block(PAGECACHE *pagecache, This code is executed only by threads that submitted secondary requests */ - KEYCACHE_DBUG_PRINT("read_block", - ("secondary request waiting for new page to be read")); + DBUG_PRINT("read_block", + ("secondary request waiting for new page to be read")); { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; /* Put the request into a queue and wait until it can be processed */ - add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); do { - KEYCACHE_DBUG_PRINT("read_block: wait", - ("suspend thread %ld", thread->id)); + DBUG_PRINT("read_block: wait", + ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } @@ -2432,9 +2358,10 @@ static void read_block(PAGECACHE *pagecache, /* No parallel requests in single-threaded case */ #endif } - KEYCACHE_DBUG_PRINT("read_block", - ("secondary request: new page in cache")); + DBUG_PRINT("read_block", + ("secondary request: new page in cache")); } + DBUG_VOID_RETURN; } @@ -2454,11 +2381,11 @@ static void read_block(PAGECACHE *pagecache, void pagecache_unlock_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, enum pagecache_page_lock lock, enum pagecache_page_pin pin, my_bool stamp_this_page, - LSN first_REDO_LSN_for_page) + LSN_PTR first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block; int page_st; @@ -2471,24 +2398,6 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(pin != PAGECACHE_PIN && lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE); - if (pin == PAGECACHE_PIN_LEFT_UNPINNED && - lock == PAGECACHE_LOCK_READ_UNLOCK) - { -#ifndef DBUG_OFF - if ( -#endif - /* block do not need here so we do not provide it */ - pagecache_make_lock_and_pin(pagecache, 0, lock, pin) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - DBUG_VOID_RETURN; - } pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* @@ -2498,7 +2407,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); BLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); if (stamp_this_page) @@ -2511,7 +2420,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, #ifndef DBUG_OFF if ( #endif - pagecache_make_lock_and_pin(pagecache, block, lock, pin) + make_lock_and_pin(pagecache, block, lock, pin) #ifndef DBUG_OFF ) { @@ -2549,7 +2458,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, void pagecache_unpin_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno) + pgcache_page_no_t pageno) { PAGECACHE_BLOCK_LINK *block; int page_st; @@ -2565,7 +2474,7 @@ void pagecache_unpin_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); #ifndef DBUG_OFF @@ -2576,9 +2485,9 @@ void pagecache_unpin_page(PAGECACHE *pagecache, a) we can't pin without any lock b) we can't unpin keeping write lock */ - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_LEFT_READLOCKED, - PAGECACHE_UNPIN) + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) #ifndef DBUG_OFF ) { @@ -2622,7 +2531,7 @@ void pagecache_unlock(PAGECACHE *pagecache, enum pagecache_page_lock lock, enum pagecache_page_pin pin, my_bool stamp_this_page, - LSN first_REDO_LSN_for_page) + LSN_PTR first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; DBUG_ENTER("pagecache_unlock"); @@ -2643,7 +2552,7 @@ void pagecache_unlock(PAGECACHE *pagecache, if ( #endif /* block do not need here so we do not provide it */ - pagecache_make_lock_and_pin(pagecache, 0, lock, pin) + make_lock_and_pin(pagecache, 0, lock, pin) #ifndef DBUG_OFF ) { @@ -2673,7 +2582,7 @@ void pagecache_unlock(PAGECACHE *pagecache, #ifndef DBUG_OFF if ( #endif - pagecache_make_lock_and_pin(pagecache, block, lock, pin) + make_lock_and_pin(pagecache, block, lock, pin) #ifndef DBUG_OFF ) { @@ -2736,9 +2645,9 @@ void pagecache_unpin(PAGECACHE *pagecache, a) we can't pin without any lock b) we can't unpin keeping write lock */ - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_LEFT_READLOCKED, - PAGECACHE_UNPIN) + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) #ifndef DBUG_OFF ) { @@ -2767,7 +2676,7 @@ void pagecache_unpin(PAGECACHE *pagecache, Read a block of data from a cached file into a buffer; SYNOPSIS - pagecache_read() + pagecache_valid_read() pagecache pointer to a page cache data structure file handler for the file for the block of data to be read pageno number of the block of data in the file @@ -2776,16 +2685,12 @@ void pagecache_unpin(PAGECACHE *pagecache, type type of the page lock lock change link link to the page if we pin it + validator validator of read from the disk data + validator_data pointer to the data need by the validator RETURN VALUE Returns address from where the data is placed if sucessful, 0 - otherwise. - NOTES. - - The function ensures that a block of data of size length from file - positioned at pageno is in the buffers for some key cache blocks. - Then the function copies the data into the buffer buff. - Pin will be choosen according to lock parameter (see lock_to_pin) */ static enum pagecache_page_pin lock_to_pin[]= @@ -2800,19 +2705,21 @@ static enum pagecache_page_pin lock_to_pin[]= PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ }; -byte *pagecache_read(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - uint level, - byte *buff, - enum pagecache_page_type type, - enum pagecache_page_lock lock, - PAGECACHE_PAGE_LINK *link) +byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data) { int error= 0; enum pagecache_page_pin pin= lock_to_pin[lock]; PAGECACHE_PAGE_LINK fake_link; - DBUG_ENTER("page_cache_read"); + DBUG_ENTER("pagecache_valid_read"); DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s", (uint) file->file, (ulong) pageno, level, page_cache_page_type_str[type], @@ -2829,7 +2736,7 @@ restart: if (pagecache->can_be_used) { /* Key cache is used */ - reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_BLOCK_LINK *block; uint status; int page_st; @@ -2842,28 +2749,32 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_r_requests++; - block= find_key_block(pagecache, file, pageno, level, - ((lock == PAGECACHE_LOCK_WRITE) ? 1 : 0), - (((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)) ? 0 : 1), - &page_st); + block= find_block(pagecache, file, pageno, level, + test(lock == PAGECACHE_LOCK_WRITE), + test((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)), + &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || block->type == type); block->type= type; - if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) - { - /* - We failed to write lock the block, cache is unlocked, and last write - lock is released, we will try to get the block again. - */ - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - goto restart; - } if (block->status != BLOCK_ERROR && page_st != PAGE_READ) { + DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); /* The requested page is to be read into the block buffer */ read_block(pagecache, block, - (my_bool)(page_st == PAGE_TO_BE_READ)); + (my_bool)(page_st == PAGE_TO_BE_READ), + validator, validator_data); + DBUG_PRINT("info", ("read is done")); + } + if (make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to write lock the block, cache is unlocked, + we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; } if (! ((status= block->status) & BLOCK_ERROR)) @@ -2933,7 +2844,7 @@ no_key_cache: /* Key cache is not used */ */ my_bool pagecache_delete_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, enum pagecache_page_lock lock, my_bool flush) { @@ -2969,13 +2880,14 @@ restart: } block= link->block; DBUG_ASSERT(block != 0); - if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + if (make_lock_and_pin(pagecache, block, lock, pin)) { /* We failed to writelock the block, cache is unlocked, and last write lock is released, we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); goto restart; } @@ -2983,7 +2895,7 @@ restart: { /* The block contains a dirty page - push it out of the cache */ - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); pagecache_pthread_mutex_unlock(&pagecache->cache_lock); /* @@ -3015,9 +2927,10 @@ restart: } /* Cache is locked, so we can relese page before freeing it */ - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN); + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + link->requests--; if (pin == PAGECACHE_PIN_LEFT_PINNED) unreg_request(pagecache, block, 1); free_block(pagecache, block); @@ -3053,11 +2966,12 @@ end: 0 if a success, 1 - otherwise. */ +/* description of how to change lock before and after write */ struct write_lock_change { - int need_lock_change; - enum pagecache_page_lock new_lock; - enum pagecache_page_lock unlock_lock; + int need_lock_change; /* need changing of lock at the end of write */ + enum pagecache_page_lock new_lock; /* lock at the beginning */ + enum pagecache_page_lock unlock_lock; /* lock at the end */ }; static struct write_lock_change write_lock_change_table[]= @@ -3084,10 +2998,11 @@ static struct write_lock_change write_lock_change_table[]= PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/ }; +/* description of how to change pin before and after write */ struct write_pin_change { - enum pagecache_page_pin new_pin; - enum pagecache_page_pin unlock_pin; + enum pagecache_page_pin new_pin; /* pin status at the beginning */ + enum pagecache_page_pin unlock_pin; /* pin status at the end */ }; static struct write_pin_change write_pin_change_table[]= @@ -3104,7 +3019,7 @@ static struct write_pin_change write_pin_change_table[]= my_bool pagecache_write(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, uint level, byte *buff, enum pagecache_page_type type, @@ -3113,7 +3028,7 @@ my_bool pagecache_write(PAGECACHE *pagecache, enum pagecache_write_mode write_mode, PAGECACHE_PAGE_LINK *link) { - reg1 PAGECACHE_BLOCK_LINK *block; + reg1 PAGECACHE_BLOCK_LINK *block= NULL; PAGECACHE_PAGE_LINK fake_link; int error= 0; int need_lock_change= write_lock_change_table[lock].need_lock_change; @@ -3133,7 +3048,7 @@ my_bool pagecache_write(PAGECACHE *pagecache, if (write_mode == PAGECACHE_WRITE_NOW) { - /* we allow direct write if wwe do not use long term lockings */ + /* we allow direct write if we do not use long term lockings */ DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED); /* Force writing from buff into disk */ pagecache->global_cache_write++; @@ -3167,10 +3082,10 @@ restart: lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && lock != PAGECACHE_LOCK_WRITE_UNLOCK && lock != PAGECACHE_LOCK_WRITE_TO_READ); - block= find_key_block(pagecache, file, pageno, level, - (need_wrlock ? 1 : 0), - (need_wrlock ? 1 : 0), - &page_st); + block= find_block(pagecache, file, pageno, level, + (need_wrlock ? 1 : 0), + (need_wrlock ? 1 : 0), + &page_st); } if (!block) { @@ -3186,24 +3101,25 @@ restart: block->type == type); block->type= type; - if (pagecache_make_lock_and_pin(pagecache, block, - write_lock_change_table[lock].new_lock, - (need_lock_change ? - write_pin_change_table[pin].new_pin : - pin))) + if (make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].new_lock, + (need_lock_change ? + write_pin_change_table[pin].new_pin : + pin))) { /* We failed to writelock the block, cache is unlocked, and last write lock is released, we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); goto restart; } if (write_mode == PAGECACHE_WRITE_DONE) { - if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + if ((block->status & BLOCK_ERROR) && page_st != PAGE_READ) { /* Copy data from buff */ bmove512(block->buffer, buff, pagecache->block_size); @@ -3212,7 +3128,7 @@ restart: ("primary request: new page in cache")); /* Signal that all pending requests for this now can be processed. */ if (block->wqueue[COND_FOR_REQUESTED].last_thread) - release_queue(&block->wqueue[COND_FOR_REQUESTED]); + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); } } else @@ -3220,7 +3136,8 @@ restart: if (write_mode == PAGECACHE_WRITE_NOW) { /* buff has been written to disk at start */ - if (block->status & BLOCK_CHANGED) + if ((block->status & BLOCK_CHANGED) && + !(block->status & BLOCK_ERROR)) link_to_file_list(pagecache, block, &block->hash_link->file, 1); } else @@ -3231,8 +3148,8 @@ restart: if (! (block->status & BLOCK_ERROR)) { bmove512(block->buffer, buff, pagecache->block_size); + block->status|= BLOCK_READ; } - block->status|= BLOCK_READ; } @@ -3242,9 +3159,9 @@ restart: int rc= #endif #warning we are doing an unlock here, so need to give the page its rec_lsn! - pagecache_make_lock_and_pin(pagecache, block, - write_lock_change_table[lock].unlock_lock, - write_pin_change_table[pin].unlock_pin); + make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].unlock_lock, + write_pin_change_table[pin].unlock_pin); #ifndef DBUG_OFF DBUG_ASSERT(rc == 0); #endif @@ -3255,10 +3172,7 @@ restart: block->hash_link->requests--; if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) { - if (write_mode != PAGECACHE_WRITE_DONE) - { - unreg_request(pagecache, block, 1); - } + unreg_request(pagecache, block, 1); } else *link= (PAGECACHE_PAGE_LINK)block; @@ -3290,6 +3204,7 @@ end: DBUG_EXECUTE("exec", test_key_cache(pagecache, "end of key_cache_write", 1);); #endif + BLOCK_INFO(block); DBUG_RETURN(error); } @@ -3321,6 +3236,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) unlink_changed(block); DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -3344,7 +3260,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) /* All pending requests for this page must be resubmitted. */ if (block->wqueue[COND_FOR_SAVED].last_thread) - release_queue(&block->wqueue[COND_FOR_SAVED]); + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); } @@ -3398,12 +3314,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache, } /* if the block is not pinned then it is not write locked */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); #ifndef DBUG_OFF { int rc= #endif - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); #ifndef DBUG_OFF DBUG_ASSERT(rc == 0); } @@ -3427,9 +3344,9 @@ static int flush_cached_blocks(PAGECACHE *pagecache, MYF(MY_NABP | MY_WAIT_IF_FULL)); pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN); + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); pagecache->global_cache_write++; if (error) @@ -3443,7 +3360,7 @@ static int flush_cached_blocks(PAGECACHE *pagecache, It might happen only during an operation to resize the key cache. */ if (block->wqueue[COND_FOR_SAVED].last_thread) - release_queue(&block->wqueue[COND_FOR_SAVED]); + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); /* type will never be FLUSH_IGNORE_CHANGED here */ if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) { @@ -3577,6 +3494,7 @@ restart: if ((error= flush_cached_blocks(pagecache, file, cache, end,type))) last_errno=error; + DBUG_PRINT("info", ("restarting...")); /* Restart the scan as some other thread might have changed the changed blocks chain: the blocks that were in switch @@ -3622,7 +3540,7 @@ removes a page from the list of dirty pages, while it's still dirty. A \ { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; - add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); do { KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", @@ -3761,7 +3679,7 @@ static int flush_all_key_blocks(PAGECACHE *pagecache) 0 on success (always because it can't fail) */ -int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) +static int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) { DBUG_ENTER("reset_key_cache_counters"); if (!key_cache->inited) diff --git a/mysys/wqueue.c b/mysys/wqueue.c new file mode 100644 index 00000000000..28e044ff606 --- /dev/null +++ b/mysys/wqueue.c @@ -0,0 +1,167 @@ + +#include + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + wqueue_link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + wqueue_add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + wqueue_unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev= thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + wqueue_realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +void wqueue_release_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread= next; + pthread_cond_signal(&thread->suspend); + next= thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} + + +/* + Add thread and wait + + SYNOPSYS + wqueue_add_and_wait() + wqueue queue to add to + thread thread which is waiting + lock mutex need for the operation +*/ + +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, pthread_mutex_t *lock) +{ + DBUG_ENTER("wqueue_add_and_wait"); + DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx", + (ulong) thread, (ulong) &thread->suspend, (ulong) lock)); + wqueue_add_to_queue(wqueue, thread); + do + { + DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx", + (ulong) &thread->suspend, (ulong) lock)); + pthread_cond_wait(&thread->suspend, lock); + DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx", + (ulong) &thread->suspend, (ulong) lock, + (ulong) thread->next)); + } + while (thread->next); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 24636f139ab..2aa9a8a36cb 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -110,7 +110,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ ma_ft_nlq_search.c ft_maria.c ma_sort.c \ ha_maria.cc trnman.c lockman.c tablockman.c \ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ - ma_sp_key.c ma_control_file.c + ma_sp_key.c ma_control_file.c ma_loghandler.c CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? SUFFIXES = .sh diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 9a99a721469..4b5ddd006c1 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -17,28 +17,10 @@ /* WL#3234 Maria control file First version written by Guilhem Bichot on 2006-04-27. - Does not compile yet. */ -#ifndef _control_file_h -#define _control_file_h - -/* - Not everybody needs to call the control file that's why control_file.h is - not in maria_def.h. However, policy or habit may want to change this. -*/ - -#ifndef REMOVE_WHEN_SANJA_PUSHES_LOG_HANDLER -/* - this is to get the control file to compile, until Sanja pushes the log - handler which will supersede those definitions. -*/ -typedef struct st_lsn { - uint32 file_no; - uint32 rec_offset; -} LSN; -#define maria_data_root "." -#endif +#ifndef _ma_control_file_h +#define _ma_control_file_h #define CONTROL_FILE_BASE_NAME "maria_control" /* diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c new file mode 100644 index 00000000000..d5c19e29ce2 --- /dev/null +++ b/storage/maria/ma_loghandler.c @@ -0,0 +1,5417 @@ +#include "maria_def.h" +#include + +/* number of opened log files in the pagecache (should be at lesst 2) */ +#define OPENED_FILES_NUM 3 + +/* records buffer size (should be LOG_PAGE_SIZE * n) */ +#define TRANSLOG_WRITE_BUFFER (1024*1024) +/* min chunk length */ +#define TRANSLOG_MIN_CHUNK 3 +/* + Number of buffers used by loghandler + + Should be at least 4, because one thread can block up to 2 buffers in + normal circumstances (less then half of one and full other, or just + switched one and other), But if we met end of the file in the middle and + have to switch buffer it will be 3. + 1 or 2 buffer for flushing/writing. +*/ +#define TRANSLOG_BUFFERS_NO 5 +/* number of bytes which is worth to be left on first page */ +#define TRANSLOG_MINCHUNK_CONTENT 1 +/* length of transaction log file name maria_log.XXXXXXXX*/ +#define TRANSLOG_FILE_NAME_LENGTH 18 +/* version of log file */ +#define TRANSLOG_VERSION_ID 10000 + +#define UNRECOVERABLE_ERROR(E) \ + do { \ + DBUG_PRINT("error", E); \ + printf E; \ + putchar('\n'); \ + } while(0); + + +/* record part descriptor */ +struct st_translog_part +{ + translog_size_t len; + uchar *buff; +}; + +/* record parts descriptor */ +struct st_translog_parts +{ + /* full record length */ + translog_size_t record_length; + /* full record length with chunk headers */ + translog_size_t total_record_length; + /* array of parts (st_translog_part) */ + DYNAMIC_ARRAY parts; + /* current part index */ + uint current; +}; + +/* log write buffer descriptor */ +struct st_translog_buffer +{ + LSN last_lsn; + /* This buffer offset in the file */ + TRANSLOG_ADDRESS offset; + /* + How much written (or will be written when copy_to_buffer_in_progress + become 0) to this buffer + */ + uint32 size; + /* This Buffer File */ + File file; + /* Threads which are waiting for buffer filling/freeing */ + WQUEUE waiting_filling_buffer; + /* Number of record which are in copy progress */ + int16 copy_to_buffer_in_progress; + /* list of waiting buffer ready threads */ + struct st_my_thread_var *waiting_flush; + /* lock for the buffer. Current buffer also lock the handler */ + pthread_mutex_t mutex; + struct st_translog_buffer *overlay; +#ifndef DBUG_OFF + struct st_my_thread_var *locked_by; + uint8 buffer_no; +#endif + /* IO cache for current log */ + uchar buffer[TRANSLOG_WRITE_BUFFER]; +}; + + +struct st_buffer_cursor +{ + /* pointer on the buffer */ + uchar *ptr; + /* current page fill */ + uint16 current_page_size; + /* how many times we finish this page to write it */ + uint16 write_counter; + /* previous write offset */ + uint16 previous_offset; + /* current buffer and its number */ + struct st_translog_buffer *buffer; + uint8 buffer_no; + my_bool chaser, protected; +}; + + +struct st_translog_descriptor +{ + /* *** Parameters of the log handler *** */ + + /* Directory to store files */ + char directory[FN_REFLEN]; + /* max size of one log size (for new logs creation) */ + uint32 log_file_max_size; + /* server version */ + uint32 server_version; + /* server ID */ + uint32 server_id; + /* Page cache for the log reads */ + PAGECACHE *pagecache; + /* Flags */ + uint flags; + /* Page overhead calculated by flags */ + uint16 page_overhead; + /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */ + uint16 page_capacity_chunk_2; + /* Loghandler's buffer capacity in case of chunk 2 filling */ + uint32 buffer_capacity_chunk_2; + /* Half of the buffer capacity in case of chunk 2 filling */ + uint32 half_buffer_capacity_chunk_2; + + /* *** Current state of the log handler *** */ + /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */ + File log_file_num[OPENED_FILES_NUM]; + /* buffers for log writing */ + struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO]; + /* + horizon - visible end of the log (here is absolute end of the log: + position where next chunk can start + */ + TRANSLOG_ADDRESS horizon; + /* horizon buffer cursor */ + struct st_buffer_cursor bc; + + /* Last flushed LSN */ + LSN flushed; + LSN sent_to_file; + pthread_mutex_t sent_to_file_lock; + File directory_fd; +}; + +static struct st_translog_descriptor log_descriptor; + +static uchar end_of_log= 0; + +/* record classes */ +enum record_class +{ + LOGRECTYPE_NOT_ALLOWED, + LOGRECTYPE_VARIABLE_LENGTH, + LOGRECTYPE_PSEUDOFIXEDLENGTH, + LOGRECTYPE_FIXEDLENGTH +}; + +/* chunk types */ +#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head + or tail */ +#define TRANSLOG_CHUNK_FIXED 0x40 /* 1 (pseudo)fixed record (also + LSN) */ +#define TRANSLOG_CHUNK_NOHDR 0x80 /* 2 no header chunk (till page + end) */ +#define TRANSLOG_CHUNK_LNGTH 0xC0 /* 3 chunk with chunk length */ +#define TRANSLOG_CHUNK_TYPE 0xC0 /* Mask to get chunk type */ +#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */ + +/* compressed (relative) LSN constants */ +#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN + length */ +#define TRANSLOG_CLSN_MAX_LEN 5 /* Maximum length of compressed + LSN */ + +typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, + void *tcb, + struct st_translog_parts *parts); + +typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, + void *tcb, + LSN *lsn, + struct st_translog_parts *parts); + +typedef int16(*read_rec_hook) (enum translog_record_type type, + int16 read_length, uchar *read_buff, + uchar *decoded_buff); + +/* Descriptor of log record type */ +struct st_log_record_type_descriptor +{ + /* internal class of the record */ + enum record_class class; + /* length for fixed-size record, or maximum length of pseudo-fixed */ + uint16 fixed_length; + /* how much record body (belonged to headers too) read with headers */ + uint16 read_header_len; + /* HOOK for writing the record called before lock */ + prewrite_rec_hook prewrite_hook; + /* HOOK for writing the record called when LSN is known */ + inwrite_rec_hook inwrite_hook; + /* HOOK for reading headers */ + read_rec_hook read_hook; + /* + For pseudo fixed records number of compressed LSNs followed by + system header + */ + int16 compresed_LSN; +}; + +static struct st_log_record_type_descriptor + log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]= +{ + /*LOGREC_RESERVED_FOR_CHUNKS23= 0 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_HEAD= 1 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_TAIL= 2 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_BLOB= 3 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_BLOBS= 4 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_PURGE_ROW= 5 */ + {LOGRECTYPE_FIXEDLENGTH, 9, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_PURGE_BLOCKS= 6 */ + {LOGRECTYPE_FIXEDLENGTH, 10, 10, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_DELETE_ROW= 7 */ + {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_UPDATE_ROW_HEAD= 8 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INDEX= 9 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_UNDELETE_ROW= 10 */ + {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0}, + /*LOGREC_CLR_END= 11 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}, + /*LOGREC_PURGE_END= 12 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}, + /*LOGREC_UNDO_ROW_INSERT= 13 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 14, 14, NULL, NULL, NULL, 1}, + /*LOGREC_UNDO_ROW_DELETE= 14 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 19, 19, NULL, NULL, NULL, 2}, + /*LOGREC_UNDO_ROW_UPDATE= 15 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 14, NULL, NULL, NULL, 2}, + /*LOGREC_UNDO_KEY_INSERT= 16 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 1}, + /*LOGREC_UNDO_KEY_DELETE= 17 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, NULL, NULL, 2}, + /*LOGREC_PREPARE= 18 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_PREPARE_WITH_UNDO_PURGE= 19 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1}, + /*LOGREC_COMMIT= 20 */ + {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_COMMIT_WITH_UNDO_PURGE= 21 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}, + /*LOGREC_CHECKPOINT_PAGE= 22 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 6, NULL, NULL, NULL, 0}, + /*LOGREC_CHECKPOINT_TRAN= 23 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_CHECKPOINT_TABL= 24 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_CREATE_TABLE= 25 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_RENAME_TABLE= 26 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_DROP_TABLE= 27 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_TRUNCATE_TABLE= 28 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_FILE_ID= 29 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0}, + /*LOGREC_LONG_TRANSACTION_ID= 30 */ + {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}, + /*31 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*32 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*33 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*34 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*35 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*36 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*37 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*38 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*39 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*40 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*41 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*42 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*43 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*44 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*45 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*46 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*47 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*48 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*49 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*50 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*51 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*52 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*53 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*54 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*55 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*56 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*57 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*58 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*59 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*60 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*61 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*62 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_RESERVED_FUTURE_EXTENSION= 63 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0} +}; + + +typedef struct st_translog_validator_data +{ + TRANSLOG_ADDRESS *addr; + my_bool was_recovered; +} TRANSLOG_VALIDATOR_DATA; + + +const char *maria_data_root; + + +/* + Get file name of the log by log number + + SYNOPSIS + translog_filename_by_fileno() + file_no Number of the log we want to open + path Pointer to buffer where file name will be + stored (must be FN_REFLEN bytes at least + RETURN + pointer to path +*/ + +char *translog_filename_by_fileno(uint32 file_no, char *path) +{ + char file_name[10 + 8 + 1]; + char *res; + DBUG_ENTER("translog_filename_by_fileno"); + my_sprintf(file_name, (file_name, "maria_log.%08u", file_no)); + res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME)); + DBUG_PRINT("info", ("Path '%s', path: 0x%lx, res: 0x%lx", + res, (ulong) path, (ulong) res)); + DBUG_RETURN(res); +} + + +/* + Open log file with given number without cache + + SYNOPSIS + open_logfile_by_number_no_cache() + file_no Number of the log we want to open + + RETURN + 0 error + file descriptor number +*/ + +File open_logfile_by_number_no_cache(uint32 file_no) +{ + File file; + char path[FN_REFLEN]; + DBUG_ENTER("open_logfile_by_number_no_cache"); + + if ((file= my_open(translog_filename_by_fileno(file_no, path), O_CREAT | O_BINARY | /* O_DIRECT + | + */ O_RDWR, + MYF(MY_WME))) < 0) + { + UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path)); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("File '%s', handler %d", path, file)); + DBUG_RETURN(file); +} + + +/* + Write log file page header in the just opened new log file + + SYNOPSIS + translog_write_file_header(); + + RETURN + 0 OK + 1 ERROR +*/ + +my_bool translog_write_file_header() +{ + ulonglong timestamp; + char page[TRANSLOG_PAGE_SIZE]; + DBUG_ENTER("translog_write_file_header"); + + /* file tag */ + strnmov(page, "MARIALOG", 8); + /* timestamp */ + timestamp= my_getsystime(); + int8store(page + 8, timestamp); + /* maria version */ + int4store(page + (8 + 8), TRANSLOG_VERSION_ID); + /* mysql version (MYSQL_VERSION_ID) */ + int4store(page + (8 + 8 + 4), log_descriptor.server_version); + /* server ID */ + int4store(page + (8 + 8 + 4 + 4), log_descriptor.server_id); + /* loghandler page size/512 */ + int2store(page + (8 + 8 + 4 + 4 + 4), TRANSLOG_PAGE_SIZE / 512); + /* file number */ + int3store(page + (8 + 8 + 4 + 4 + 4 + 2), log_descriptor.horizon.file_no); + + bzero(page + (8 + 8 + 4 + 4 + 4 + 2 + 3), + TRANSLOG_PAGE_SIZE - (8 + 8 + 4 + 4 + 4 + 2 + 3)); + + if (my_pwrite(log_descriptor.log_file_num[0], page, + TRANSLOG_PAGE_SIZE, 0, MYF(MY_WME)) != TRANSLOG_PAGE_SIZE) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + Initialize transaction log file buffer + + SYNOPSIS + translog_buffer_init() + buffer The buffer to initialize + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_buffer_init(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_init"); + /* This buffer offset */ + buffer->last_lsn.file_no= buffer->offset.file_no= 0; + buffer->last_lsn.rec_offset= buffer->offset.rec_offset= 0; + /* This Buffer File */ + buffer->file= 0; + buffer->overlay= 0; + /* IO cache for current log */ + bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER); + /* Buffer size */ + buffer->size= 0; + /* cond of thread which is waiting for buffer filling */ + buffer->waiting_filling_buffer.last_thread= 0; + /* Number of record which are in copy progress */ + buffer->copy_to_buffer_in_progress= 0; + /* list of waiting buffer ready threads */ + buffer->waiting_flush= 0; + /* lock for the buffer. Current buffer also lock the handler */ + if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("Init buffer #%u: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer)); + DBUG_RETURN(0); +} + + +/* + Close transaction log file by descriptor + + SYNOPSIS + translog_close_log_file() + file file descriptor + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_close_log_file(File file) +{ + PAGECACHE_FILE fl= + { + file + }; + flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE); + return test(my_close(file, MYF(MY_WME))); +} + + +/* + Create and fill header of new file + + SYNOPSIS + translog_create_new_file() + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_create_new_file() +{ + int i; + + DBUG_ENTER("translog_create_new_file"); + + if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] && + translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM - + 1])) + DBUG_RETURN(1); + for (i= OPENED_FILES_NUM - 1; i > 0; i--) + { + log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1]; + } + + if ((log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(log_descriptor.horizon.file_no)) <= 0 || + translog_write_file_header()) + DBUG_RETURN(1); + + if (ma_control_file_write_and_force(NULL, log_descriptor.horizon.file_no, + CONTROL_FILE_UPDATE_ONLY_LOGNO)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + Lock the loghandler buffer + + SYNOPSIS + translog_buffer_lock() + buffer This buffer which should be locked + + RETURN + 0 - OK + 1 - Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_buffer_lock(struct st_translog_buffer *buffer) +{ + int res; + DBUG_ENTER("translog_buffer_lock"); + DBUG_PRINT("enter", ("Lock buffer #%u (0x%lx): locked by:0x%lx, mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, (ulong) &buffer->mutex)); + res= (pthread_mutex_lock(&buffer->mutex) != 0); +#ifndef DBUG_OFF + if (res == 0) + { + DBUG_ASSERT(buffer->locked_by == 0); + buffer->locked_by= my_thread_var; + } + else + DBUG_PRINT("error", ("Can't lock mutex 0x%lx (locked by0x%lx) errno: %d", + (ulong) &buffer->mutex, + (ulong) buffer->locked_by, res)); +#endif + DBUG_RETURN(res); +} +#else +#define translog_buffer_lock(B) \ + pthread_mutex_lock(&B->mutex); +#endif + + +/* + Unlock the loghandler buffer + + SYNOPSIS + translog_buffer_unlock() + buffer This buffer which should be unlocked + + RETURN + 0 - OK + 1 - Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer) +{ + int res; + DBUG_ENTER("translog_buffer_unlock"); + DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) :locked by:0x%lx (0x%lx)," + " mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, (ulong) my_thread_var, + (ulong) &buffer->mutex)); + DBUG_ASSERT(buffer->locked_by == my_thread_var); + + buffer->locked_by= 0; + res= (pthread_mutex_unlock(&buffer->mutex) != 0); + DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx, mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + DBUG_RETURN(res); +} +#else +#define translog_buffer_unlock(B) \ + pthread_mutex_unlock(&B->mutex); +#endif + + +/* + Write page header. + + SYNOPSIS + translog_new_page_header() + horizon Where to write the page + cursor Where to write the page + + NOTE + - space for page header should be checked before +*/ + +static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + uchar *ptr; + + DBUG_ENTER("translog_new_page_header"); + DBUG_ASSERT(cursor->ptr !=NULL); + + cursor->protected= 0; + + ptr= cursor->ptr; + /* Page number */ + int3store(ptr, horizon->rec_offset / TRANSLOG_PAGE_SIZE); + ptr +=3; + /* File number */ + int3store(ptr, horizon->file_no); + ptr +=3; + *(ptr ++)= (uchar) log_descriptor.flags; + if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + { +#ifndef DBUG_OFF + DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)", + (ulong) horizon->file_no, (ulong) horizon->rec_offset)); + int4store(ptr, 0x11223344); +#endif + ptr +=4; /* CRC will be put when page + will be finished */ + } + if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + { + time_t tm; + int2store(ptr, time(&tm) & 0xFFFF); + ptr +=(TRANSLOG_PAGE_SIZE / 512) * 2; + } + { + uint len= (ptr -cursor->ptr); + horizon->rec_offset+= len; + cursor->current_page_size= len; + if (!cursor->chaser) + cursor->buffer->size+= len; + } + cursor->ptr= ptr; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr -cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_VOID_RETURN; +} + + +/* + Put sector protection on the page image + + SYNOPSIS + translog_put_sector_protection() + page reference on the page content + cursor cursor of the buffer +*/ + +static void translog_put_sector_protection(uchar *page, + struct st_buffer_cursor *cursor) +{ + uchar *table= page + log_descriptor.page_overhead - + (TRANSLOG_PAGE_SIZE / 512) * 2; + uint16 value= uint2korr(table) + cursor->write_counter; + uint16 last_protected_sector= (cursor->previous_offset - 1) / 512; + uint16 start_sector= cursor->previous_offset / 512; + uint i, offset; + + DBUG_ENTER("translog_put_sector_protection"); + if (start_sector == 0) + start_sector= 1; + + DBUG_PRINT("enter", ("Write counter %u, value %u, offset %u, " + "last protected %u, start sector %u", + (uint) cursor->write_counter, + (uint) value, + (uint) cursor->previous_offset, + (uint) last_protected_sector, (uint) start_sector)); + if (last_protected_sector == start_sector) + { + i= last_protected_sector * 2; + offset= last_protected_sector * 512; + /* restore data, because we modified sector which was protected */ + if (offset < cursor->previous_offset) + page[offset]= table[i]; + offset++; + if (offset < cursor->previous_offset) + page[offset]= table[i + 1]; + } + for (i= start_sector * 2, offset= start_sector * 512; + i < (TRANSLOG_PAGE_SIZE / 512) * 2; (i+= 2), (offset+= 512)) + { + DBUG_PRINT("info", ("sector %u, offset %u, data 0x%x%x", + i / 2, offset, (uint) page[offset], + (uint) page[offset + 1])); + table[i]= page[offset]; + table[i + 1]= page[offset + 1]; + /**((uint16 *)(table + i))= *((uint16* )(page + offset));*/ + int2store(page + offset, value); + DBUG_PRINT("info", ("sector %u, offset %u, data 0x%x%x", + i / 2, offset, (uint) page[offset], + (uint) page[offset + 1])); + } + DBUG_VOID_RETURN; +} + + +/* + Calculate adler CRC of given area + + SYNOPSIS + translog_adler_crc() + area Pointer of the area beginning + length The Area length + + RETURN + Adler CRC32 +*/ + +uint32 translog_adler_crc(uchar *area, uint length) +{ + uint32 a= 1, b= 0; +#define MOD_ADLER 65521 + + while (length) + { + uint tlen= length > 5550 ? 5550 : length; + length-= tlen; + do + { + a+= *area++; + b+= a; + } while (--tlen); + a= (a & 0xffff) + (a >> 16) * (65536 - MOD_ADLER); + b= (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER); + } + /* It can be shown that a <= 0x1013a here, so a single subtract will do. */ + if (a >= MOD_ADLER) + a-= MOD_ADLER; + /* It can be shown that b can reach 0xffef1 here. */ + b= (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER); + if (b >= MOD_ADLER) + b-= MOD_ADLER; + return (b << 16) | a; +} + + +/* + Finish current page with zeros + + SYNOPSIS + translog_finish_page() + horizon \ horizon & buffer pointers + cursor / +*/ + +static void translog_finish_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_size; + uchar *page= cursor->ptr -cursor->current_page_size; + DBUG_ENTER("translog_finish_page"); + + DBUG_PRINT("enter", ("Buffer #%u 0x%lx, " + "Buffer addr (%lu,0x%lx), " + "Page addr: (%lu,0x%lx), " + "size %lu (%lu), Pg: %u, left: %u", + (uint) cursor->buffer_no, (ulong) cursor->buffer, + (ulong) cursor->buffer->offset.file_no, + (ulong) cursor->buffer->offset.rec_offset, + (ulong) horizon->file_no, + (ulong) (horizon->rec_offset - + cursor->current_page_size), + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer), + (uint) cursor->current_page_size, (uint) left)); + DBUG_ASSERT(cursor->ptr !=NULL); + DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == + cursor->current_page_size % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(horizon->file_no == cursor->buffer->offset.file_no); + DBUG_ASSERT(cursor->buffer->offset.rec_offset + + (cursor->ptr -cursor->buffer->buffer) == horizon->rec_offset); + if (cursor->protected) + { + DBUG_PRINT("info", ("Already protected and finished")); + DBUG_VOID_RETURN; + } + if (left != TRANSLOG_PAGE_SIZE && left != 0) + { + DBUG_PRINT("info", ("left %u", (uint) left)); + bzero(cursor->ptr, left); + cursor->ptr +=left; + horizon->rec_offset+= left; + if (!cursor->chaser) + cursor->buffer->size+= left; + cursor->current_page_size= 0; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx, " + "chaser: %d, Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, + (ulong) cursor->buffer, cursor->chaser, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + DBUG_ASSERT(cursor->chaser + || ((ulong) (cursor->ptr -cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + } + if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + { + translog_put_sector_protection(page, cursor); + DBUG_PRINT("info", ("drop write_counter")); + cursor->write_counter= 0; + cursor->previous_offset= 0; + } + if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_adler_crc(page + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc)); + int4store(page + 3 + 3 + 1, crc); + } + cursor->protected= 1; + DBUG_VOID_RETURN; +} + + +/* + Wait until all thread finish filling this buffer + + SYNOPSIS + translog_wait_for_writers() + buffer This buffer should be check + + NOTE + This buffer should be locked +*/ +static void translog_wait_for_writers(struct st_translog_buffer *buffer) +{ + struct st_my_thread_var *thread; + DBUG_ENTER("translog_wait_for_writers"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx, copies in progress: %u", + (uint) buffer->buffer_no, (ulong) buffer, + (int) buffer->copy_to_buffer_in_progress)); + + if (!buffer->copy_to_buffer_in_progress) + DBUG_VOID_RETURN; + + thread= my_thread_var; + + DBUG_ASSERT(buffer->file != 0); + + do + { + DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, " + "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + "mutex: 0x%lx", + thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, (ulong) thread, + (ulong) &buffer->mutex)); +#ifndef DBUG_OFF + DBUG_ASSERT(buffer->locked_by == thread); + buffer->locked_by= 0; +#endif + wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, + &buffer->mutex); + DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, " + "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + "mutex: 0x%lx", + thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, (ulong) thread, + (ulong) &buffer->mutex)); +#ifndef DBUG_OFF + DBUG_ASSERT(buffer->locked_by == 0); + buffer->locked_by= thread; +#endif + } while (buffer->copy_to_buffer_in_progress != 0); + + DBUG_VOID_RETURN; +} + + +/* + + Wait for this buffer become free + + SYNOPSIS + translog_wait_for_buffer_free() + buffer The buffer to initialize + + NOTE + - this buffer should be locked +*/ + +static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("translog_wait_for_buffer_free"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx, copies in progress: %u size 0x%lu", + (uint) buffer->buffer_no, (ulong) buffer, + (int) buffer->copy_to_buffer_in_progress, + (ulong) buffer->size)); + + translog_wait_for_writers(buffer); + + if (!buffer->file) + DBUG_VOID_RETURN; + + thread= my_thread_var; + + do + { + DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, " + "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + "mutex: 0x%lx", + thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, (ulong) thread, + (ulong) &buffer->mutex)); +#ifndef DBUG_OFF + DBUG_ASSERT(buffer->locked_by == thread); + buffer->locked_by= 0; +#endif + wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, + &buffer->mutex); + DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, " + "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + "mutex: 0x%lx", + thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, (ulong) thread, + (ulong) &buffer->mutex)); +#ifndef DBUG_OFF + DBUG_ASSERT(buffer->locked_by == 0); + buffer->locked_by= thread; +#endif + } while (buffer->copy_to_buffer_in_progress != 0); + DBUG_VOID_RETURN; +} + + +/* + Set cursor on the buffer beginning + + SYNOPSIS + translog_cursor_init() + buffer The buffer + cursor It's cursor + buffer_no Number of buffer +*/ + +static void translog_cursor_init(struct st_buffer_cursor *cursor, + struct st_translog_buffer *buffer, + uint8 buffer_no) +{ + DBUG_ENTER("translog_cursor_init"); + cursor->ptr= buffer->buffer; + cursor->buffer= buffer; + cursor->buffer_no= buffer_no; + cursor->current_page_size= 0; + cursor->chaser= (cursor != &log_descriptor.bc); + DBUG_PRINT("info", ("drop write_counter")); + cursor->write_counter= 0; + cursor->previous_offset= 0; + cursor->protected= 0; + DBUG_VOID_RETURN; +} + + +/* + Initialize buffer for current file + + SYNOPSIS + translog_start_buffer() + buffer The buffer + cursor It's cursor + buffer_no Number of buffer +*/ +static void translog_start_buffer(struct st_translog_buffer *buffer, + struct st_buffer_cursor *cursor, + uint8 buffer_no) +{ + DBUG_ENTER("translog_start_buffer"); + DBUG_PRINT("enter", + ("Assign buffer #%u (0x%lx) to file %u, offset 0x%lx(%lu)", + (uint) buffer->buffer_no, (ulong) buffer, + (uint) log_descriptor.log_file_num[0], + (ulong) log_descriptor.horizon.rec_offset, + (ulong) log_descriptor.horizon.rec_offset)); + DBUG_ASSERT(buffer_no == buffer->buffer_no); + buffer->last_lsn.file_no= 0; + buffer->last_lsn.rec_offset= 0; + buffer->offset= log_descriptor.horizon; + buffer->file= log_descriptor.log_file_num[0]; + buffer->overlay= 0; + buffer->size= 0; + translog_cursor_init(cursor, buffer, buffer_no); + DBUG_PRINT("info", ("init cursor #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr -cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_VOID_RETURN; +} + + +/* + Switch to the next buffer in a chain + + SYNOPSIS + translog_buffer_next() + horizon \ Pointers on current position in file and buffer + cursor / + next_file Also start new file + + NOTE: + - loghandler should be locked + - after return new and old buffer still are locked + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + my_bool new_file) +{ + uint8 old_buffer_no= cursor->buffer_no; + uint8 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no; + my_bool chasing= cursor->chaser; + DBUG_ENTER("translog_buffer_next"); + + DBUG_PRINT("info", ("horizon (%u,0x%lx), chasing: %d", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, chasing)); + + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0); + + translog_finish_page(horizon, cursor); + + if (!chasing) + { + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + } +#ifndef DBUG_OFF + else + DBUG_ASSERT(new_buffer->file != 0); +#endif + if (new_file) + { + horizon->file_no++; + horizon->rec_offset= TRANSLOG_PAGE_SIZE; /* header page */ + if (!chasing && translog_create_new_file()) + { + DBUG_RETURN(1); + } + } + + /* prepare next page */ + if (chasing) + translog_cursor_init(cursor, new_buffer, new_buffer_no); + else + translog_start_buffer(new_buffer, cursor, new_buffer_no); + translog_new_page_header(horizon, cursor); + DBUG_RETURN(0); +} + + +/* + Set max LSN send to file + + SYNOPSIS + translog_set_sent_to_file() + lsn LSN to assign +*/ + +static void translog_set_sent_to_file(LSN *lsn) +{ + DBUG_ENTER("translog_set_sent_to_file"); + pthread_mutex_lock(&log_descriptor.sent_to_file_lock); + DBUG_ASSERT(cmp_translog_addr(*lsn, log_descriptor.sent_to_file) >= 0); + log_descriptor.sent_to_file= *lsn; + pthread_mutex_unlock(&log_descriptor.sent_to_file_lock); + DBUG_VOID_RETURN; +} + + +/* + Get max LSN send to file + + SYNOPSIS + translog_get_sent_to_file() + lsn LSN to value +*/ + +static void translog_get_sent_to_file(LSN *lsn) +{ + DBUG_ENTER("translog_get_sent_to_file"); + pthread_mutex_lock(&log_descriptor.sent_to_file_lock); + *lsn= log_descriptor.sent_to_file; + pthread_mutex_unlock(&log_descriptor.sent_to_file_lock); + DBUG_VOID_RETURN; +} + + +/* + Get first chunk address on the given page + + SYNOPSIS + translog_get_first_chunk_offset() + page The page where to find first chunk + + RETURN + first chunk offset + 0 - Error +*/ + +static my_bool translog_get_first_chunk_offset(uchar *page) +{ + uint16 page_header= 7; + DBUG_ENTER("translog_get_first_chunk_offset"); + + if (page[6] & TRANSLOG_PAGE_CRC) + { + page_header+= 4; + } + if (page[6] & TRANSLOG_SECTOR_PROTECTION) + { + page_header+= (TRANSLOG_PAGE_SIZE / 512) * 2; + } + DBUG_RETURN(page_header); +} + + +/* + Write coded length of record + + SYNOPSIS + translog_write_variable_record_1group_code_len + dst Destination buffer pointer + length Length which should be coded + header_len Calculated total header length +*/ + +static void +translog_write_variable_record_1group_code_len(uchar *dst, + translog_size_t length, + uint16 header_len) +{ + switch (header_len) { + case 6: /* (5 + 1) */ + DBUG_ASSERT(length <= 250); + *dst= (uint8) length; + return; + case 8: /* (5 + 3) */ + DBUG_ASSERT(length <= 0xFFFF); + *dst= 251; + int2store(dst + 1, length); + return; + case 9: /* (5 + 4) */ + DBUG_ASSERT(length <= 0xFFFFFF); + *dst= 252; + int3store(dst + 1, length); + return; + case 10: /* (5 + 5) */ + *dst= 253; + int4store(dst + 1, length); + return; + default: + DBUG_ASSERT(0); + } + return; +} + + +/* + Decode record data length and advance given pointer to the next field + + SYNOPSIS + translog_variable_record_1group_decode_len() + src The pointer to the pointer to the length beginning + + RETURN + decoded length +*/ + +static translog_size_t translog_variable_record_1group_decode_len(uchar **src) +{ + uint8 first= (uint8) (**src); + switch (first) { + case 251: + *src+= 3; + return (uint2korr((*src) - 2)); + case 252: + *src+= 4; + return (uint3korr((*src) - 3)); + case 253: + *src+= 5; + return (uint4korr((*src) - 4)); + case 254: + case 255: + DBUG_ASSERT(0); /* reserved for future use */ + return (0); + default: + (*src)++; + return (first); + } +} + + +/* + Get total length of this chunk (not only body) + + SYNOPSIS + translog_get_total_chunk_length() + page The page where chunk placed + offset Offset of the chunk on this place + + RETURN + total length of the chunk + 0 - Error +*/ + +uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) +{ + DBUG_ENTER("translog_get_total_chunk_length"); + switch (page[offset] & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: /* 0 chunk referred as LSN + (head or tail) */ + { + translog_size_t rec_len; + uchar *start= page + offset; + uchar *ptr= start + 1 + 2; + uint16 chunk_len, header_len, page_rest; + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); + rec_len= translog_variable_record_1group_decode_len(&ptr); + chunk_len= uint2korr(ptr); + header_len= (ptr -start) +2; + DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, header len: %u", + (ulong) rec_len, (uint) chunk_len, (uint) header_len)); + if (chunk_len) + { + DBUG_PRINT("info", ("chunk len: %u + %u = %u", + (uint) header_len, (uint) chunk_len, + (uint) (chunk_len + header_len))); + DBUG_RETURN(chunk_len + header_len); + } + page_rest= TRANSLOG_PAGE_SIZE - offset; + DBUG_PRINT("info", ("page_rest %u", (uint) page_rest)); + if (rec_len + header_len < page_rest) + DBUG_RETURN(rec_len + header_len); + DBUG_RETURN(page_rest); + break; + } + case TRANSLOG_CHUNK_FIXED: /* 1 (pseudo)fixed record (also + LSN) */ + { + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED")); + uint type= page[offset] & TRANSLOG_REC_TYPE; + DBUG_ASSERT(log_record_type_descriptor[type].class == + LOGRECTYPE_FIXEDLENGTH || + log_record_type_descriptor[type].class == + LOGRECTYPE_PSEUDOFIXEDLENGTH); + if (log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH) + { + DBUG_PRINT("info", + ("Fixed length: %u", + (uint) (log_record_type_descriptor[type].fixed_length + 3))); + DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3); + } + { + uchar *ptr= page + offset + 3; /* first compressed LSN */ + int i= 0; + uint length= log_record_type_descriptor[type].fixed_length + 3; + for (; i < log_record_type_descriptor[type].compresed_LSN; i++) + { + /* first 2 bits is length - 2 */ + uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2; + ptr+= len; + length-= (TRANSLOG_CLSN_MAX_LEN - len); /* subtract economized + bytes */ + } + DBUG_PRINT("info", ("Pseudo-fixed length: %u", length)); + DBUG_RETURN(length); + } + break; + } + case TRANSLOG_CHUNK_NOHDR: /* 2 no header chunk (till page + end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR, length: %u", + (uint) (TRANSLOG_PAGE_SIZE - offset))); + DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset); + break; + case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH")); + DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3); + DBUG_PRINT("info", ("Length %u", uint2korr(page + offset + 1) + 3)); + DBUG_RETURN(uint2korr(page + offset + 1) + 3); + break; + default: + DBUG_ASSERT(0); + } +} + + +/* + Flush given buffer + + SYNOPSIS + translog_buffer_flush() + buffer This buffer should be flushed + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) +{ + uint32 i; + DBUG_ENTER("translog_buffer_flush"); + DBUG_PRINT("enter", + ("Buffer #%u 0x%lx: locked by 0x%lx (0x%lx), " + "file: %u, offset (%lu,0x%lx), size %lu", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) buffer->locked_by, my_thread_var, + (uint) buffer->file, + (ulong) buffer->offset.file_no, (ulong) buffer->offset.rec_offset, + (ulong) buffer->size)); + + DBUG_ASSERT(buffer->locked_by == my_thread_var); + DBUG_ASSERT(buffer->file != 0); + + translog_wait_for_writers(buffer); + if (buffer->overlay && buffer->overlay->file) + { + struct st_translog_buffer *overlay= buffer->overlay; + translog_buffer_unlock(buffer); + translog_buffer_lock(overlay); + translog_wait_for_buffer_free(overlay); + translog_buffer_unlock(overlay); + translog_buffer_lock(buffer); + } + + for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE) + { + PAGECACHE_FILE file= + { + buffer->file + }; + if (pagecache_write(log_descriptor.pagecache, + &file, + (buffer->offset.rec_offset + i) / TRANSLOG_PAGE_SIZE, + 3, + buffer->buffer + i, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DONE, 0)) + { + UNRECOVERABLE_ERROR(("Cant't write page (%lu,0x%lx) to pagecacte", + (ulong) buffer->file, + (ulong) (buffer->offset.rec_offset + i))); + } + } + if (my_pwrite(buffer->file, (char*) buffer->buffer, + buffer->size, buffer->offset.rec_offset, + MYF(MY_WME)) != buffer->size) + { + UNRECOVERABLE_ERROR(("Cant't buffer (%lu,0x%lx) size %lu to the disk (%d)", + (ulong) buffer->file, + (ulong) buffer->offset.rec_offset, + (ulong) buffer->size, errno)); + DBUG_RETURN(1); + } + if (buffer->last_lsn.rec_offset != 0) /* if buffer->last_lsn is set */ + translog_set_sent_to_file(&buffer->last_lsn); + /* Free buffer */ + buffer->file= 0; + buffer->overlay= 0; + if (buffer->waiting_filling_buffer.last_thread != NULL) + { + wqueue_release_queue(&buffer->waiting_filling_buffer); + } + DBUG_RETURN(0); +} + + +/* + Recover page with sector protection (wipe out failed chunks) + + SYNOPSYS + translog_recover_page_up_to_sector() + page reference on the page + offset offset of failed sector + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset) +{ + uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end; + DBUG_ENTER("translog_recover_page_up_to_sector"); + DBUG_PRINT("enter", ("offset %u, first chunk %u", + (uint) offset, (uint) chunk_offset)); + + if (chunk_offset == 0) + DBUG_RETURN(1); + + while (page[chunk_offset] != '\0' && chunk_offset < offset) + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + { + UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)", + (uint) chunk_offset)); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("chunk: offset: %u, length %u", + (uint) chunk_offset, (uint) chunk_length)); + if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE) + { + UNRECOVERABLE_ERROR(("demaged chunk (offset %u) in trusted area", + (uint) chunk_offset)); + DBUG_RETURN(1); + } + chunk_offset+= chunk_length; + } + + valid_chunk_end= chunk_offset; + /*end of trusted area - sector parsing */ + while (page[chunk_offset] != '\0') + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + { + break; + } + DBUG_PRINT("info", ("chunk: offset: %u, length %u", + (uint) chunk_offset, (uint) chunk_length)); + if (((ulong) chunk_offset) + ((ulong) chunk_length) > (uint) (offset + 512)) + { + break; + } + chunk_offset+= chunk_length; + valid_chunk_end= chunk_offset; + } + DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end)); + + bzero(page + valid_chunk_end, TRANSLOG_PAGE_SIZE - valid_chunk_end); + + DBUG_RETURN(0); +} + + +/* + Log page validator + + SYNOPSIS + translog_page_validator() + page_addr The page to check + data data, need for validation (address in this case) + + RETURN + 0 - OK + 1 - Error +*/ +static my_bool translog_page_validator(byte *page_addr, gptr data) +{ + uint8 flags; + uchar *page= (uchar*) page_addr; + DBUG_ENTER("translog_page_validator"); + TRANSLOG_ADDRESS *addr= ((TRANSLOG_VALIDATOR_DATA*) data)->addr; + + ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 0; + + if (uint3korr(page) != addr->rec_offset / TRANSLOG_PAGE_SIZE || + uint3korr(page + 3) != addr->file_no) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "page address written in the page is incorrect :" + "File %lu instead of %lu or page %lu instead of %lu", + (ulong) addr->file_no, (ulong) addr->rec_offset, + (ulong) uint3korr(page + 3), (ulong) addr->file_no, + (ulong) uint3korr(page), + (ulong) addr->rec_offset / TRANSLOG_PAGE_SIZE)); + DBUG_RETURN(1); + } + flags= page[3 + 3]; + if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | + TRANSLOG_RECORD_CRC)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "Garbage in the page flags field detected : %x", + (ulong) addr->file_no, (ulong) addr->rec_offset, + (uint) flags)); + DBUG_RETURN(1); + } + if (flags & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_adler_crc(page + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + if (crc != uint4korr(page + 3 + 3 + 1)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "CRC mismatch: calculated: %lx on the page %lx", + (ulong) addr->file_no, (ulong) addr->rec_offset, + (ulong) crc, (ulong) uint4korr(page + 3 + 3 + 1))); + DBUG_RETURN(1); + } + } + if (flags & TRANSLOG_SECTOR_PROTECTION) + { + uint i, offset; + uchar *table= (page + 3 + 3 + 1 + ((flags & TRANSLOG_PAGE_CRC) ? 4 : 0)); + uint16 current= uint2korr(table); + for (i= 2, offset= 512; + i < (TRANSLOG_PAGE_SIZE / 512) * 2; i+= 2, offset+= 512) + { + /* + TODO: add cunk counting for "suspecting" sectors (difference is + more that 1-2) + */ + uint16 test= uint2korr(page + offset); + DBUG_PRINT("info", ("sector #%u offset %u current %lx " + "read 0x%lx stored 0x%x%x", + i / 2, offset, current, + (uint) uint2korr(page + offset), (uint) table[i], + (uint) table[i + 1])); + if (test < current) + { + if (0xFFFFLL - current + test > 512 / 3) + { + /* it is not overflow */ + if (translog_recover_page_up_to_sector(page, offset)) + DBUG_RETURN(1); + ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 1; + DBUG_RETURN(0); + } + } + else if (test - current > 512 / 3) + { + if (translog_recover_page_up_to_sector(page, offset)) + DBUG_RETURN(1); + ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 1; + DBUG_RETURN(0); + } + + /* Return value on the page */ + page[offset]= table[i]; + page[offset + 1]= table[i + 1]; + /**((uint16*)page + offset)= *((uint16*)(table + i));*/ + + current= test; + DBUG_PRINT("info", ("sector #%u offset %u current %lx " + "read 0x%lx stored 0x%x%x", + i / 2, offset, current, + (uint) uint2korr(page + offset), (uint) table[i], + (uint) table[i + 1])); + } + } + DBUG_RETURN(0); +} + +/* + Get log page by file number and offset of the beginning of the page + + SYNOPSIS + translog_get_page() + data validator data, which contains the page address + buffer buffer for page placing + (might not be used in some cache implementations) + + RETURN + pointer to the page cache which should be used to read this page + NULL - Error +*/ + +uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) +{ + uint cache_index; + DBUG_ENTER("translog_get_page"); + DBUG_PRINT("enter", ("File %lu, Offset %lu(0x%lx)", + (ulong) data->addr->file_no, + (ulong) data->addr->rec_offset, + (ulong) data->addr->rec_offset)); + + /* it is really page address */ + DBUG_ASSERT(data->addr->rec_offset % TRANSLOG_PAGE_SIZE == 0); + + if ((cache_index= log_descriptor.horizon.file_no - data->addr->file_no) < + OPENED_FILES_NUM) + { + PAGECACHE_FILE file; + /* file in the cache */ + if (log_descriptor.log_file_num[cache_index] == 0) + { + if ((log_descriptor.log_file_num[cache_index]= + open_logfile_by_number_no_cache(data->addr->file_no)) == 0) + { + DBUG_RETURN(NULL); + } + } + file.file= log_descriptor.log_file_num[cache_index]; + + buffer= (uchar*) + pagecache_valid_read(log_descriptor.pagecache, &file, + data->addr->rec_offset / TRANSLOG_PAGE_SIZE, + 3, (char*) buffer, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0, + &translog_page_validator, (gptr) data); + } + else + { + File file= open_logfile_by_number_no_cache(data->addr->file_no); + if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE, + data->addr->rec_offset, MYF(MY_FNABP | MY_WME))) + buffer= NULL; + else if (translog_page_validator((byte*) buffer, (gptr) data)) + buffer= NULL; + my_close(file, MYF(MY_WME)); + } + DBUG_RETURN(buffer); +} + + +/* + Finds last page of the given log file + + SYNOPSIS + translog_get_last_page_addr() + addr address structure to fill with data, which contain + file number of the log file + last_page_ok assigned 1 if last page was OK + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, + my_bool *last_page_ok) +{ + MY_STAT stat_buff, *stat; + char path[FN_REFLEN]; + DBUG_ENTER("translog_get_last_page_addr"); + + if ((stat= my_stat (translog_filename_by_fileno(addr->file_no, + path), + &stat_buff, MYF(MY_WME))) == NULL) + DBUG_RETURN(1); + DBUG_PRINT("info", ("File size %lu", (ulong) stat->st_size)); + if (stat->st_size > TRANSLOG_PAGE_SIZE) + { + addr->rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) * + TRANSLOG_PAGE_SIZE); + *last_page_ok= (stat->st_size == addr->rec_offset + TRANSLOG_PAGE_SIZE); + } + else + { + *last_page_ok= 0; + addr->rec_offset= 0; + } + DBUG_PRINT("info", ("Last page: 0x%lx, ok %d", (ulong) addr->rec_offset, + *last_page_ok)); + DBUG_RETURN(0); +} + + +/* + Get number bytes for record length storing + + SYNOPSIS + translog_variable_record_length_bytes() + length Record length wich will be codded + + RETURN + 1,3,4,5 - number of bytes to store given length +*/ +static uint translog_variable_record_length_bytes(translog_size_t length) +{ + if (length < 250) + return 1; + else if (length < 0xFFFF) + return 3; + else if (length < 0xFFFFFF) + return 4; + return 5; +} + + +/* + Get header of this chunk + + SYNOPSIS + translog_get_chunk_header_length() + page The page where chunk placed + offset Offset of the chunk on this place + + RETURN + total length of the chunk + 0 - Error +*/ + +uint16 translog_get_chunk_header_length(uchar *page, uint16 offset) +{ + DBUG_ENTER("translog_get_chunk_header_length"); + switch (page[offset] & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: /* 0 chunk referred as LSN + (head or tail) */ + { + translog_size_t rec_len; + uchar *start= page + offset; + uchar *ptr= start + 1 + 2; + uint16 chunk_len, header_len; + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); + rec_len= translog_variable_record_1group_decode_len(&ptr); + chunk_len= uint2korr(ptr); + header_len= (ptr -start) +2; + DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, header len: %u", + (ulong) rec_len, (uint) chunk_len, (uint) header_len)); + if (chunk_len) + { + /*TODO: fine header end */ + DBUG_ASSERT(0); + } + DBUG_RETURN(header_len); + break; + } + case TRANSLOG_CHUNK_FIXED: /* 1 (pseudo)fixed record (also + LSN) */ + { + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3")); + DBUG_RETURN(3); + } + case TRANSLOG_CHUNK_NOHDR: /* 2 no header chunk (till page + end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1")); + DBUG_RETURN(1); + break; + case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3")); + DBUG_RETURN(3); + break; + default: + DBUG_ASSERT(0); + } +} + + +/* + Initialize transaction log + + SYNOPSIS + translog_init() + directory Directory where log files are put + log_file_max_size max size of one log size (for new logs creation) + server_version version of MySQL server (MYSQL_VERSION_ID) + server_id server ID (replication & Co) + pagecache Page cache for the log reads + flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION + TRANSLOG_RECORD_CRC) + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_init(const char *directory, + uint32 log_file_max_size, + uint32 server_version, + uint32 server_id, PAGECACHE *pagecache, uint flags) +{ + int i; + int old_log_was_recovered= 0, logs_found= 0; + TRANSLOG_ADDRESS sure_page, last_page, last_valid_page; + DBUG_ENTER("translog_init"); + + + if (pthread_mutex_init(&log_descriptor.sent_to_file_lock, MY_MUTEX_INIT_FAST)) + DBUG_RETURN(1); + + /* Directory to store files */ + unpack_dirname(log_descriptor.directory, directory); + + if ((log_descriptor.directory_fd= my_open(log_descriptor.directory, + O_RDONLY, MYF(MY_WME))) < 0) + { + UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'", + errno, log_descriptor.directory)); + DBUG_RETURN(1); + } + + /* max size of one log size (for new logs creation) */ + log_descriptor.log_file_max_size= + log_file_max_size - (log_file_max_size % TRANSLOG_PAGE_SIZE); + /* server version */ + log_descriptor.server_version= server_version; + /* server ID */ + log_descriptor.server_id= server_id; + /* Page cache for the log reads */ + log_descriptor.pagecache= pagecache; + /* Flags */ + DBUG_ASSERT((flags & + ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | + TRANSLOG_RECORD_CRC)) == 0); + log_descriptor.flags= flags; + log_descriptor.page_overhead= 7; + if (flags & TRANSLOG_PAGE_CRC) + log_descriptor.page_overhead+= 4; + if (flags & TRANSLOG_SECTOR_PROTECTION) + log_descriptor.page_overhead+= (TRANSLOG_PAGE_SIZE / 512) * 2; + log_descriptor.page_capacity_chunk_2= + TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1; + DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0); + log_descriptor.buffer_capacity_chunk_2= + (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) * + log_descriptor.page_capacity_chunk_2; + log_descriptor.half_buffer_capacity_chunk_2= + log_descriptor.buffer_capacity_chunk_2 / 2; + DBUG_PRINT("info", + ("Overhead: %u, pc2: %u, bc2: %u, bc2/2: %u", + log_descriptor.page_overhead, + log_descriptor.page_capacity_chunk_2, + log_descriptor.buffer_capacity_chunk_2, + log_descriptor.half_buffer_capacity_chunk_2)); + + /* *** Current state of the log handler *** */ + + /* Init log handler file handlers cache */ + for (i= 0; i < OPENED_FILES_NUM; i++) + { + log_descriptor.log_file_num[i]= 0; + } + + /* just to init it somehow */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + + /* Buffers for log writing */ + for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) + { +#ifndef DBUG_OFF + log_descriptor.buffers[i].buffer_no= (uint8) i; + log_descriptor.buffers[i].locked_by= NULL; +#endif + if (translog_buffer_init(log_descriptor.buffers + i)) + DBUG_RETURN(1); + } + + logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO); + + if (logs_found) + { + my_bool pageok; + /* + TODO: scan directory for maria_log.XXXXXXXX files and find + highest XXXXXXXX & set logs_found + */ + + /* TODO: check that last checkpoint within present log addresses space */ + /* find the log end */ + if (last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + DBUG_ASSERT(last_checkpoint_lsn.rec_offset == 0); + /* there was no checkpoints we will read from the beginning */ + sure_page.file_no= 1; + sure_page.rec_offset= TRANSLOG_PAGE_SIZE; + } + else + { + sure_page= last_checkpoint_lsn; + DBUG_ASSERT(sure_page.rec_offset % TRANSLOG_PAGE_SIZE != 0); + sure_page.rec_offset-= sure_page.rec_offset % TRANSLOG_PAGE_SIZE; + } + log_descriptor.horizon.file_no= last_page.file_no= last_logno; + if (translog_get_last_page_addr(&last_page, &pageok)) + DBUG_RETURN(1); + if (last_page.rec_offset == 0) + { + if (last_page.file_no == 1) + { + logs_found= 0; /* file #1 has no pages */ + } + else + { + last_page.file_no--; + if (translog_get_last_page_addr(&last_page, &pageok)) + DBUG_RETURN(1); + } + } + } + if (logs_found) + { + TRANSLOG_ADDRESS current_page= sure_page; + my_bool pageok; + + DBUG_ASSERT(sure_page.file_no < last_page.file_no || + (sure_page.file_no == last_page.file_no && + sure_page.rec_offset <= last_page.rec_offset)); + + /* TODO: check page size */ + + last_valid_page.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO; + last_valid_page.rec_offset= 0; + /* scan and validate pages */ + do + { + TRANSLOG_ADDRESS current_file_last_page; + current_file_last_page.file_no= current_page.file_no; + if (translog_get_last_page_addr(¤t_file_last_page, &pageok)) + DBUG_RETURN(1); + if (!pageok) + { + DBUG_PRINT("error", ("File %u have no complete last page", + (uint) current_file_last_page.file_no)); + old_log_was_recovered= 1; + /* This file is not written till the end so it should be last */ + last_page= current_file_last_page; + /* TODO: issue warning */ + } + do + { + TRANSLOG_VALIDATOR_DATA data= + { + ¤t_page, 0 + }; + uchar buffer[TRANSLOG_PAGE_SIZE], *page; + if ((page= translog_get_page(&data, buffer)) == NULL) + DBUG_RETURN(1); + if (data.was_recovered) + { + DBUG_PRINT("error", ("file no %u (%d), rec_offset 0x%lx (%lu) (%d)", + (uint) current_page.file_no, + (uint3korr(page + 3) != current_page.file_no), + (ulong) current_page.rec_offset, + (ulong) (current_page.rec_offset / + TRANSLOG_PAGE_SIZE), + (uint3korr(page) != + current_page.rec_offset / TRANSLOG_PAGE_SIZE))); + old_log_was_recovered= 1; + break; + } + last_valid_page= current_page; + current_page.rec_offset+= TRANSLOG_PAGE_SIZE; + } while (current_page.rec_offset <= current_file_last_page.rec_offset); + current_page.file_no++; + current_page.rec_offset= TRANSLOG_PAGE_SIZE; + } while (current_page.file_no <= last_page.file_no && + !old_log_was_recovered); + if (last_valid_page.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + DBUG_ASSERT(last_valid_page.rec_offset == 0); + + /* Panic!!! Even page which should be valid is invalid */ + /* TODO: issue error */ + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Last valid page is in file %lu offset %lu (0x%lx), " + "Logs found: %d, was recovered: %d", + (ulong) last_valid_page.file_no, + (ulong) last_valid_page.rec_offset, + (ulong) last_valid_page.rec_offset, + logs_found, old_log_was_recovered)); + + /* TODO: check server ID */ + if (logs_found && !old_log_was_recovered) + { + TRANSLOG_VALIDATOR_DATA data= + { + &last_valid_page, 0 + }; + uchar buffer[TRANSLOG_PAGE_SIZE], *page; + uint16 chunk_offset; + /* continue old log */ + DBUG_ASSERT(last_valid_page.file_no == log_descriptor.horizon.file_no); + if ((page= translog_get_page(&data, + buffer)) == NULL || + (chunk_offset= translog_get_first_chunk_offset(page)) == 0) + DBUG_RETURN(1); + + /* Puts filled part of old page in the buffer */ + log_descriptor.horizon= last_valid_page; + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + /* + Free space if filled with 0 and first byte of + real chunk can't be 0 + */ + while (chunk_offset < TRANSLOG_PAGE_SIZE && page[chunk_offset] != '\0') + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + DBUG_RETURN(1); + DBUG_PRINT("info", ("chunk: offset: %u, length %u", + (uint) chunk_offset, (uint) chunk_length)); + chunk_offset+= chunk_length; + + /* chunk can't cross the page border */ + DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE); + } + memmove(log_descriptor.buffers->buffer, page, chunk_offset); + log_descriptor.bc.buffer->size+= chunk_offset; + log_descriptor.bc.ptr+= chunk_offset; + log_descriptor.bc.current_page_size= chunk_offset; + log_descriptor.horizon.rec_offset= + chunk_offset + last_valid_page.rec_offset; + DBUG_PRINT("info", ("Move Page #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer))); + DBUG_ASSERT(log_descriptor.bc.chaser + || + ((ulong) + (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == + log_descriptor.bc.buffer->size)); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); + } + } + DBUG_PRINT("info", ("Logs found: %d, was recovered %d", + logs_found, old_log_was_recovered)); + if (!logs_found) + { + /* Start new log system from scratch */ + /* Current log number */ + log_descriptor.horizon.file_no= 1; + /* Used space */ + log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; // header page + /* Current logs file number in page cache */ + log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(log_descriptor.horizon.file_no); + if (translog_write_file_header()) + DBUG_RETURN(1); + if (ma_control_file_write_and_force(NULL, log_descriptor.horizon.file_no, + CONTROL_FILE_UPDATE_ONLY_LOGNO)) + DBUG_RETURN(1); + /* assign buffer 0 */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + } + else if (old_log_was_recovered) + { + int buffer_touched= log_descriptor.bc.buffer->file; + if (buffer_touched) + { + struct st_translog_buffer *buffer= log_descriptor.bc.buffer; + /* + We are in initialization so we can use translog_buffer_lock instead + of translog_lock, because there is no other threads which can lock + the loghandler. + */ + if (translog_buffer_lock(buffer) || + translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, + 1) || + translog_buffer_unlock(log_descriptor.bc.buffer) || + translog_buffer_flush(buffer) || translog_buffer_unlock(buffer)) + DBUG_RETURN(1); + } + else + { + log_descriptor.horizon.file_no++; /* leave the demaged file + untouched */ + log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; /* header page */ + if (translog_create_new_file()) + DBUG_RETURN(1); + /* + Buffer system left untouched after recovery => we should init it + (starting from buffer 0) + */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + } + } + + /* all LSNs that are on disk are flushed */ + log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon; + log_descriptor.flushed.rec_offset--; + log_descriptor.sent_to_file.rec_offset--; + + DBUG_RETURN(0); +} + + +/* + Free transaction log file buffer + + SYNOPSIS + translog_buffer_destroy() + buffer_no The buffer to free + + NOTE + This buffer should be locked; +*/ + +static void translog_buffer_destroy(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_destroy"); + DBUG_PRINT("enter", + ("Buffer #%u: 0x%lx, file: %u, offset (%u,0x%lx), size %lu", + (uint) buffer->buffer_no, (ulong) buffer, + (uint) buffer->file, + (ulong) buffer->offset.file_no, (ulong) buffer->offset.rec_offset, + (ulong) buffer->size)); + DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0); + if (buffer->file) + { + /* + We ignore error here, because we can't do something about it + (it is shutting down) + */ + translog_buffer_flush(buffer); + } + DBUG_PRINT("info", ("Unlock mutex 0x%lx", (ulong) &buffer->mutex)); + pthread_mutex_unlock(&buffer->mutex); + DBUG_PRINT("info", ("Destroy mutex 0x%lx", (ulong) &buffer->mutex)); + pthread_mutex_destroy(&buffer->mutex); + DBUG_VOID_RETURN; +} + + +/* + Free log handler resources + + SYNOPSIS + translog_destroy() +*/ + +void translog_destroy() +{ + int i; + DBUG_ENTER("translog_destroy"); + if (log_descriptor.bc.buffer->file != 0) + translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc); + + for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) + { + struct st_translog_buffer *buffer= log_descriptor.buffers + i; + translog_buffer_lock(buffer); + translog_buffer_destroy(buffer); + } + /* close files */ + for (i= 0; i < OPENED_FILES_NUM; i++) + { + if (log_descriptor.log_file_num[i]) + translog_close_log_file(log_descriptor.log_file_num[i]); + } + pthread_mutex_destroy(&log_descriptor.sent_to_file_lock); + my_close(log_descriptor.directory_fd, MYF(MY_WME)); + DBUG_VOID_RETURN; +} + + +/* + Lock the loghandler + + SYNOPSIS + translog_lock() + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_lock() +{ + struct st_translog_buffer *current_buffer; + DBUG_ENTER("translog_lock"); + + /* + locking the loghandler mean locking current buffer, but it can change + during locking, so we should check it + */ + for (;;) + { + current_buffer= log_descriptor.bc.buffer; + if (translog_buffer_lock(current_buffer)) + DBUG_RETURN(1); + if (log_descriptor.bc.buffer == current_buffer) + break; + translog_buffer_unlock(current_buffer); + } + DBUG_RETURN(0); +} + + +/* + Unlock the loghandler + + SYNOPSIS + translog_unlock() + + RETURN + 0 - OK + 1 - Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_unlock() +{ + DBUG_ENTER("translog_unlock"); + translog_buffer_unlock(log_descriptor.bc.buffer); + + DBUG_RETURN(0); +} +#else +#define translog_unlock() \ + translog_buffer_unlock(log_descriptor.bc.buffer); +#endif + +/* + Start new page + + SYNOPSIS + translog_page_next() + horizon \ Position in file and buffer where we are + cursor / + prev_buffer Buffer which should be flushed will be assigned + here if it is need + + NOTE + handler should be locked + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + struct st_translog_buffer **prev_buffer) +{ + struct st_translog_buffer *buffer= cursor->buffer; + DBUG_ENTER("translog_page_next"); + + if ((cursor->ptr +TRANSLOG_PAGE_SIZE > + cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) || + (horizon->rec_offset + TRANSLOG_PAGE_SIZE > + log_descriptor.log_file_max_size)) + { + DBUG_PRINT("info", ("Switch to next buffer, Buffer Size %lu (%lu) => %d, " + "File size %lu max %lu => %d", + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer), + (cursor->ptr +TRANSLOG_PAGE_SIZE > + cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER), + (ulong) horizon->rec_offset, + (ulong) log_descriptor.log_file_max_size, + (horizon->rec_offset + TRANSLOG_PAGE_SIZE > + log_descriptor.log_file_max_size))); + if (translog_buffer_next(horizon, cursor, + (horizon->rec_offset + + TRANSLOG_PAGE_SIZE) > + log_descriptor.log_file_max_size)) + DBUG_RETURN(1); + *prev_buffer= buffer; + DBUG_PRINT("info", ("Buffer #%u (0x%lu) have to be flushed", + (uint) buffer->buffer_no, (ulong) buffer)); + } + else + { + DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu), " + "Buffer Size %lu (%lu)", + (uint) buffer->buffer_no, + (ulong) buffer, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + translog_finish_page(horizon, cursor); + translog_new_page_header(horizon, cursor); + *prev_buffer= NULL; + } + DBUG_RETURN(0); +} + + +/* + Write data of given length to the current page + + SYNOPSIS + translog_write_data_on_page() + horizon \ Pointers on file and buffer + cursor / + length IN length of the chunk + buffer buffer with data + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, uchar *buffer) +{ + DBUG_ENTER("translog_write_data_on_page"); + DBUG_PRINT("enter", ("Chunk length: %lu Page size %u", + (ulong) length, (uint) cursor->current_page_size)); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(length + cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + + TRANSLOG_WRITE_BUFFER); + + memmove(cursor->ptr, buffer, length); + cursor->ptr+= length; + horizon->rec_offset+= length; + cursor->current_page_size+= length; + if (!cursor->chaser) + cursor->buffer->size+= length; + DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx," + "chaser: %d, Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr -cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + + DBUG_RETURN(0); +} + + +/* + Write data from parts of given length to the current page + + SYNOPSIS + translog_write_parts_on_page() + horizon \ Pointers on file and buffer + cursor / + length IN length of the chunk + parts IN/OUT chunk source + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + struct st_translog_parts *parts) +{ + translog_size_t left= length; + uint cur= (uint) parts->current; + DBUG_ENTER("translog_write_parts_on_page"); + DBUG_PRINT("enter", ("Chunk length: %lu, parts %u of %u. Page size %u, " + "Buffer size: %lu (%lu)", + (ulong) length, + (uint) (cur + 1), (uint) parts->parts.elements, + (uint) cursor->current_page_size, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(length + cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + + TRANSLOG_WRITE_BUFFER); + + do + { + translog_size_t len; + struct st_translog_part part; + uchar *buff; + + DBUG_ASSERT(cur < parts->parts.elements); + get_dynamic(&parts->parts, (gptr) &part, cur); + buff= part.buff; + DBUG_PRINT("info", ("Part %u, Length: %lu, left: %lu", + (uint) (cur + 1), (ulong) part.len, (ulong) left)); + + if (part.len > left) + { + /* we should write less then the current part */ + len= left; + part.len-= len; + part.buff+= len; + if (set_dynamic(&parts->parts, (gptr) &part, cur)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("Set new part %u, Length: %lu", + (uint) (cur + 1), (ulong) part.len)); + } + else + { + len= part.len; + cur++; + DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len)); + } + DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u", + (ulong) cursor->ptr, (ulong)buff, (uint)len)); + memmove(cursor->ptr, buff, len); + left-= len; + cursor->ptr+= len; + } while (left); + + parts->current= cur; + horizon->rec_offset+= length; + cursor->current_page_size+= length; + if (!cursor->chaser) + cursor->buffer->size+= length; + DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx, " + "chaser: %d, Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer))); + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr -cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == + cursor->current_page_size % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + + DBUG_RETURN(0); +} + + +/* + Put 1 group chunk type 0 header into parts array + + SYNOPSIS + translog_write_variable_record_1group_header() + parts Descriptor of record source parts + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + header_length Calculated header length of chunk type 0 + chunk0_header Buffer for the chunk header writing +*/ + +static void +translog_write_variable_record_1group_header(struct st_translog_parts *parts, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + uint16 header_length, + uchar *chunk0_header) +{ + struct st_translog_part part; + DBUG_ASSERT(parts->current != 0); /* first part is left for + header */ + parts->total_record_length+= (part.len= header_length); + part.buff= chunk0_header; + *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN); + int2store(chunk0_header + 1, short_trid); + translog_write_variable_record_1group_code_len(chunk0_header + 3, + parts->record_length, + header_length); + int2store(chunk0_header + header_length - 2, 0); + parts->current--; + set_dynamic(&parts->parts, (gptr) &part, parts->current); +} + + +/* + Increase number of writers for this buffer + + SYNOPSIS + translog_buffer_increase_writers() + buffer target buffer +*/ + +#ifndef DBUG_OFF +static void translog_buffer_increase_writers(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_increase_writers"); + buffer->copy_to_buffer_in_progress++; + DBUG_PRINT("info", ("copy_to_buffer_in_progress, buffer #%u 0x%lx: %d", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->copy_to_buffer_in_progress)); + DBUG_VOID_RETURN; +} +#else +#define translog_buffer_increase_writers(B) \ + (B)->copy_to_buffer_in_progress++; +#endif + + +/* + Decrease number of writers for this buffer + + SYNOPSIS + translog_buffer_decrease_writers() + buffer target buffer +*/ + + +static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_decrease_writers"); + buffer->copy_to_buffer_in_progress--; + DBUG_PRINT("info", ("copy_to_buffer_in_progress, buffer #%u 0x%lx: %d", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->copy_to_buffer_in_progress)); + if (buffer->copy_to_buffer_in_progress == 0 && + buffer->waiting_filling_buffer.last_thread != NULL) + { + wqueue_release_queue(&buffer->waiting_filling_buffer); + } + DBUG_VOID_RETURN; +} + + +/* + Put chunk 2 from new page beginning + + SYNOPSIS + translog_write_variable_record_chunk2_page() + parts Descriptor of record source parts + horizon \ Pointers on file position and buffer + cursor / + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool +translog_write_variable_record_chunk2_page(struct st_translog_parts *parts, + TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + struct st_translog_buffer *buffer_to_flush= 0; + int rc; + uchar chunk2_header[1]= + { + TRANSLOG_CHUNK_NOHDR + }; + + DBUG_ENTER("translog_write_variable_record_chunk2_page"); + + rc= translog_page_next(horizon, cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + + translog_write_data_on_page(horizon, cursor, 1, chunk2_header); + translog_write_parts_on_page(horizon, cursor, + log_descriptor.page_capacity_chunk_2, parts); + DBUG_RETURN(0); +} + + +/* + Put chunk 3 of requested length in the buffer from new page beginning + + SYNOPSIS + translog_write_variable_record_chunk3_page() + parts Descriptor of record source parts + length Length of this chunk + horizon \ Pointers on file position and buffer + cursor / + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool +translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, + uint16 length, + TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + struct st_translog_buffer *buffer_to_flush= 0; + struct st_translog_part part; + int rc; + uchar chunk3_header[1 + 2]; + + DBUG_ENTER("translog_write_variable_record_chunk3_page"); + + rc= translog_page_next(horizon, cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + if (length == 0) + { + /* It was call to write page header only (no data for chunk 3) */ + DBUG_PRINT("info", ("It is a call to make page header only")); + DBUG_RETURN(0); + } + + DBUG_ASSERT(parts->current != 0); /* first part is left for + header */ + parts->total_record_length+= (part.len= 1 + 2); + part.buff= chunk3_header; + *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH); + int2store(chunk3_header + 1, length); + parts->current--; + set_dynamic(&parts->parts, (gptr) &part, parts->current); + + translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts); + DBUG_RETURN(0); +} + +/* + Move log pointer (horizon) on given number pages starting from next page, + and given offset on the last page + + SYNOPSIS + translog_advance_pointer() + pages Number of full pages starting from the next one + last_page_data Plus this data on the last page + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) +{ + translog_size_t last_page_offset= + log_descriptor.page_overhead + last_page_data; + translog_size_t offset= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size /* next + page + */ + + pages * TRANSLOG_PAGE_SIZE + last_page_offset; + translog_size_t buffer_end_offset, file_end_offset, min_offset; + DBUG_ENTER("translog_advance_pointer"); + DBUG_PRINT("enter", ("Pointer: (%u, 0x%lx) + %u + %u pages + %u + %u", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) (TRANSLOG_PAGE_SIZE - + log_descriptor.bc.current_page_size), + pages, (uint) log_descriptor.page_overhead, + (uint) last_page_data)); + + for (;;) + { + uint8 new_buffer_no= + (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer; + struct st_translog_buffer *old_buffer; + buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size; + file_end_offset= + log_descriptor.log_file_max_size - log_descriptor.horizon.rec_offset; + DBUG_PRINT("info", ("offset: %lu, buffer_end_offs: %lu, " + "file_end_offs: %lu", + (ulong) offset, (ulong) buffer_end_offset, + (ulong) file_end_offset)); + DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = " + "0x%lx (0x%lx)", + (uint) log_descriptor.bc.buffer->buffer_no, + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) log_descriptor.bc.buffer->offset.rec_offset, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.buffer->offset.rec_offset + + log_descriptor.bc.buffer->size), + (ulong) log_descriptor.horizon.rec_offset)); + DBUG_ASSERT(log_descriptor.bc.buffer->offset.rec_offset + + log_descriptor.bc.buffer->size == + log_descriptor.horizon.rec_offset); + + if (offset <= buffer_end_offset && offset <= file_end_offset) + break; + old_buffer= log_descriptor.bc.buffer; + new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO; + new_buffer= log_descriptor.buffers + new_buffer_no; + + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + + min_offset= (buffer_end_offset < file_end_offset ? + buffer_end_offset : file_end_offset); + log_descriptor.bc.buffer->size+= min_offset; + log_descriptor.bc.ptr +=min_offset; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer))); + DBUG_ASSERT((ulong) + (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == + log_descriptor.bc.buffer->size); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + translog_buffer_increase_writers(log_descriptor.bc.buffer); + + if (file_end_offset <= buffer_end_offset) + { + log_descriptor.horizon.file_no++; + log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; + DBUG_PRINT("info", ("New file %d", log_descriptor.horizon.file_no)); + if (translog_create_new_file()) + { + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("info", ("The same file")); + log_descriptor.horizon.rec_offset+= min_offset; + } + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + if (translog_buffer_unlock(old_buffer)) + { + DBUG_RETURN(1); + } + offset-= min_offset; + } + log_descriptor.bc.ptr+= offset; + log_descriptor.bc.buffer->size+= offset; + translog_buffer_increase_writers(log_descriptor.bc.buffer); + log_descriptor.horizon.rec_offset+= offset; + log_descriptor.bc.current_page_size= last_page_offset; + DBUG_PRINT("info", ("drop write_counter")); + log_descriptor.bc.write_counter= 0; + log_descriptor.bc.previous_offset= 0; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu), " + "offset: %u last page: %u", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer-> + buffer), (uint) offset, + (uint) last_page_offset)); + DBUG_ASSERT(log_descriptor.bc.chaser + || + ((ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) + == log_descriptor.bc.buffer->size)); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + DBUG_PRINT("info", + ("pointer moved to: (%u, 0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset)); + DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer-> + buffer) %TRANSLOG_PAGE_SIZE == + log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); + log_descriptor.bc.protected= 0; + DBUG_RETURN(0); +} + + + +/* + Get page rest + + SYNOPSIS + translog_get_current_page_rest() + + NOTE loghandler should be locked + + RETURN + number of bytes left on the current page +*/ + +#define translog_get_current_page_rest() \ + (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size) + +/* + Get buffer rest in full pages + + SYNOPSIS + translog_get_current_buffer_rest() + + NOTE loghandler should be locked + + RETURN + number of full pages left on the current buffer +*/ + +#define translog_get_current_buffer_rest() \ + ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER - \ + log_descriptor.bc.ptr) / \ + TRANSLOG_PAGE_SIZE) + +/* + Calculate possible group size without first (current) page + + SYNOPSIS + translog_get_current_group_size() + + NOTE loghandler should be locked + + RETURN + group size without first (current) page +*/ + +static translog_size_t translog_get_current_group_size() +{ + /* buffer rest in full pages */ + translog_size_t buffer_rest= translog_get_current_buffer_rest(); + + DBUG_ENTER("translog_get_current_group_size"); + + DBUG_PRINT("info", ("buffer_rest in pages %lu", buffer_rest)); + buffer_rest*= log_descriptor.page_capacity_chunk_2; + /* in case of only half of buffer free we can write this and next buffer */ + if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2) + { + DBUG_PRINT("info", ("buffer_rest %lu -> add %lu", + buffer_rest, + (ulong) log_descriptor.buffer_capacity_chunk_2)); + buffer_rest+= log_descriptor.buffer_capacity_chunk_2; + } + + DBUG_PRINT("info", ("buffer_rest %lu", buffer_rest)); + + DBUG_RETURN(buffer_rest); +} + + +/* + Write variable record in 1 group + + SYNOPSIS + translog_write_variable_record_1group() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Calculated header length of chunk type 0 + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool +translog_write_variable_record_1group(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, uint16 header_length, + void *tcb) +{ + TRANSLOG_ADDRESS horizon; + struct st_buffer_cursor cursor; + int rc= 0; + uint i; + translog_size_t record_rest, full_pages, first_page; + uint additional_chunk3_page= 0; + uchar chunk0_header[1 + 2 + 5 + 2]; + + DBUG_ENTER("translog_write_variable_record_1group"); + + *lsn= horizon= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, + lsn, parts)) + { + DBUG_RETURN(1); + } + cursor= log_descriptor.bc; + cursor.chaser= 1; + + /* + Advance pointer To be able unlock the loghandler + */ + first_page= translog_get_current_page_rest(); + record_rest= parts->record_length - (first_page - header_length); + full_pages= record_rest / log_descriptor.page_capacity_chunk_2; + record_rest= (record_rest % log_descriptor.page_capacity_chunk_2); + + if (record_rest + 1 == log_descriptor.page_capacity_chunk_2) + { + DBUG_PRINT("info", ("2 chunks type 3 is needed")); + /* We will write 2 chunks type 3 at the end of this group */ + additional_chunk3_page= 1; + record_rest= 1; + } + + DBUG_PRINT("info", ("first_page: %u (%u), full_pages: %u (%lu), " + "additional: %u (%u), rest %u = %u", + first_page, first_page - header_length, + full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + additional_chunk3_page, + additional_chunk3_page * + (log_descriptor.page_capacity_chunk_2 - 1), + record_rest, parts->record_length)); + /* record_rest + 3 is chunk type 3 overhead + record_rest */ + translog_advance_pointer(full_pages + additional_chunk3_page, + (record_rest ? record_rest + 3 : 0)); + log_descriptor.bc.buffer->last_lsn= *lsn; + + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + if (rc) + DBUG_RETURN(1); + + translog_write_variable_record_1group_header(parts, type, short_trid, + header_length, chunk0_header); + + /* fill the pages */ + translog_write_parts_on_page(&horizon, &cursor, first_page, parts); + + + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, (ulong) horizon.rec_offset)); + + for (i= 0; i < full_pages; i++) + { + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + DBUG_RETURN(1); + + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, (ulong) horizon.rec_offset)); + } + + if (additional_chunk3_page) + { + if (translog_write_variable_record_chunk3_page(parts, + log_descriptor. + page_capacity_chunk_2 - 2, + &horizon, &cursor)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, (ulong) horizon.rec_offset)); + DBUG_ASSERT(cursor.current_page_size == TRANSLOG_PAGE_SIZE); + } + + if (translog_write_variable_record_chunk3_page(parts, + record_rest, + &horizon, &cursor)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, (ulong) horizon.rec_offset)); + + rc= translog_buffer_lock(cursor.buffer); + if (!rc) + { + /* + check if we wrote something on lst not full page and need to reconstruct + CRC and sector protection + if (buffer->offset.rec_offset + buffer->size - horizon->rec_offset > + */ + translog_buffer_decrease_writers(cursor.buffer); + } + rc|= translog_buffer_unlock(cursor.buffer); + DBUG_RETURN(rc); +} + + +/* + Write variable record in 1 chunk + + SYNOPSIS + translog_write_variable_record_1chunk() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Calculated header length of chunk type 0 + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool +translog_write_variable_record_1chunk(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, uint16 header_length, + void *tcb) +{ + int rc; + uchar chunk0_header[1 + 2 + 5 + 2]; + DBUG_ENTER("translog_write_variable_record_1chunk"); + + translog_write_variable_record_1group_header(parts, type, short_trid, + header_length, chunk0_header); + + *lsn= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, + lsn, parts)) + { + DBUG_RETURN(1); + } + + rc= translog_write_parts_on_page(&log_descriptor.horizon, + &log_descriptor.bc, + parts->total_record_length, parts); + log_descriptor.bc.buffer->last_lsn= *lsn; + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + DBUG_RETURN(rc); +} + + +/* + Calculate and write LSN difference (compressed LSN) + + SYNOPSIS + translog_put_LSN_diff() + base_lsn LSN from which we calculate difference + lsn LSN for codding + dst pointer before which result should be written + + NOTE: + to store an LSN in a compact way we will use the following compression: + + if a log record has LSN1, and it contains the lSN2 as a back reference, + instead of LSN2 we write LSN1-LSN2, encoded as: + + two bits the number N (see below) + 14 bits + N bytes + + that is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. + + RETURN + pointer on coded LSN + NULL - error +*/ + +static uchar *translog_put_LSN_diff(LSN *base_lsn, LSN *lsn, uchar *dst) +{ + DBUG_ENTER("translog_put_LSN_diff"); + DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), val: (0x%lx,0x%lx), dst 0x%lx", + (ulong) base_lsn->file_no, + (ulong) base_lsn->rec_offset, + (ulong) lsn->file_no, + (ulong) lsn->rec_offset, (ulong) dst)); + if (base_lsn->file_no == lsn->file_no) + { + uint32 diff; + DBUG_ASSERT(base_lsn->rec_offset > lsn->rec_offset); + diff= base_lsn->rec_offset - lsn->rec_offset; + if (diff <= 0x3FFF) + { + dst-= 2; + dst[0]= diff >> 8; + dst[1]= (diff & 0xFF); + } + else if (diff <= 0x3FFFFF) + { + dst-= 3; + dst[0]= 0x40 | (diff >> 16); + int2store(dst + 1, diff & 0xFFFF); + } + else if (diff <= 0x3FFFFFFF) + { + dst-= 4; + dst[0]= 0x80 | (diff >> 24); + int3store(dst + 1, diff & 0xFFFFFF); + } + else + { + dst-= 5; + dst[0]= 0xC0; + int4store(dst + 1, diff); + } + } + else + { + uint32 diff; + uint32 offset_diff; + ulonglong base_offset= base_lsn->rec_offset; + DBUG_ASSERT(base_lsn->file_no > lsn->file_no); + diff= base_lsn->file_no - lsn->file_no; + if (base_offset < lsn->rec_offset) + { + /* take 1 from file offset */ + diff--; + base_offset+= 0x100000000LL; + } + offset_diff= base_offset - lsn->rec_offset; + if (diff > 0x3f) + { + /*TODO: error - too long transaction - panic!!! */ + UNRECOVERABLE_ERROR(("Too big file diff: %lu", (ulong) diff)); + DBUG_RETURN(NULL); + } + dst-= 5; + *dst= (0xC0 | diff); + int4store(dst + 1, offset_diff); + } + DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst)); + DBUG_RETURN(dst); +} + + +/* + Get LSN from LSN-difference (compressed LSN) + + SYNOPSIS + translog_get_LSN_from_diff() + base_lsn LSN from which we calculate difference + src pointer to coded lsn + dst pointer to buffer where to write 7byte LSN + + NOTE: + to store an LSN in a compact way we use the following compression: + + If a log record has LSN1, and it contains the lSN2 as a back reference, + instead of LSN2 we write LSN1-LSN2, encoded as: + + two bits the number N (see below) + 14 bits + N bytes + + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. + + RETURN + pointer to buffer after decoded LSN +*/ + +static uchar *translog_get_LSN_from_diff(LSN *base_lsn, uchar *src, uchar *dst) +{ + LSN lsn; + uint32 diff; + uint32 first_byte; + uint8 code; + DBUG_ENTER("translog_get_LSN_from_diff"); + DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), src: 0x%lx, dst 0x%lx", + (ulong) base_lsn->file_no, + (ulong) base_lsn->rec_offset, (ulong) src, (ulong) dst)); + first_byte= *((uint8*) src); + code= first_byte & 0xC0; + first_byte &= 0x3F; + switch (code) { + case 0x00: + lsn.file_no= base_lsn->file_no; + lsn.rec_offset= + base_lsn->rec_offset - ((first_byte << 8) + *((uint8*) (src + 1))); + src+= 2; + break; + case 0x40: + lsn.file_no= base_lsn->file_no; + diff= uint2korr(src + 1); + lsn.rec_offset= base_lsn->rec_offset - ((first_byte << 16) + diff); + src+= 3; + break; + case 0x80: + lsn.file_no= base_lsn->file_no; + diff= uint3korr(src + 1); + lsn.rec_offset= base_lsn->rec_offset - ((first_byte << 24) + diff); + src+= 4; + break; + case 0xC0: + { + ulonglong base_offset= base_lsn->rec_offset; + diff= uint4korr(src + 1); + if (diff > base_lsn->rec_offset) + { + /* take 1 from file offset */ + first_byte++; + base_offset+= 0x100000000LL; + } + lsn.file_no= base_lsn->file_no - first_byte; + lsn.rec_offset= base_offset - diff; + src+= 5; + break; + } + default: + DBUG_ASSERT(0); + DBUG_RETURN(NULL); + } + lsn7store(dst, &lsn); + DBUG_PRINT("info", ("new src: 0x%lx", (ulong) dst)); + DBUG_RETURN(src); +} + + +/* + Encode relative LSNs listed in the parameters + + SYNOPSIS + translog_relative_LSN_encode() + parts Parts list with encoded LSN(s) + base_lsn LSN which is base for encoding + lsns number of LSN(s) to encode + compressed_LSNs buffer which can be used for storing compressed LSN(s) + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, + LSN *base_lsn, + uint lsns, uchar *compressed_LSNs) +{ + struct st_translog_part part; + uint lsns_len= lsns * 7; + + DBUG_ENTER("translog_relative_LSN_encode"); + + get_dynamic(&parts->parts, (gptr) &part, parts->current); + /* collect all LSN(s) in one chunk if it (they) is (are) divided */ + if (part.len < lsns_len) + { + uint copied= part.len; + DBUG_PRINT("info", ("Using buffer 0x%lx", (ulong) compressed_LSNs)); + memmove(compressed_LSNs, part.buff, part.len); + do + { + get_dynamic(&parts->parts, (gptr) &part, parts->current + 1); + if ((part.len + copied) < lsns_len) + { + memmove(compressed_LSNs + copied, part.buff, part.len); + copied+= part.len; + delete_dynamic_element(&parts->parts, parts->current + 1); + } + else + { + uint len= lsns_len - copied; + memmove(compressed_LSNs + copied, part.buff, len); + copied= lsns_len; + part.buff+= len; + part.len-= len; + /* + We do not check result of set_dynamic, because we are sure that + it will not grow + */ + set_dynamic(&parts->parts, (gptr) &part, parts->current + 1); + } + } while (copied < lsns_len); + part.len= lsns_len; + part.buff= compressed_LSNs; + } + { + /* Compress */ + LSN ref; + uint economy; + uchar *ref_ptr= part.buff + lsns_len - 7; + uchar *dst_ptr= part.buff + lsns_len; + uint i; + for (i= 0; i < lsns; i++, ref_ptr-= 7) + { + lsn7korr(&ref, ref_ptr); + if ((dst_ptr= translog_put_LSN_diff(base_lsn, &ref, dst_ptr)) == NULL) + DBUG_RETURN(1); + } + economy= (dst_ptr - part.buff); + DBUG_PRINT("info", ("Economy %u", economy)); + part.len-= economy; + parts->record_length-= economy; + parts->total_record_length-= economy; + part.buff= dst_ptr; + } + /* + We do not check result of set_dynamic, because we are sure that + it will not grow + */ + set_dynamic(&parts->parts, (gptr) &part, parts->current); + DBUG_RETURN(0); +} + + +/* + Write multi-group variable-size record + + SYNOPSIS + translog_write_variable_record_mgroup() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Header length calculated for 1 group + buffer_rest Beginning from which we plan to write in full pages + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool +translog_write_variable_record_mgroup(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, + uint16 header_length, + translog_size_t buffer_rest, + void *tcb) +{ + TRANSLOG_ADDRESS horizon; + struct st_buffer_cursor cursor; + int rc= 0; + uint i, chunk2_page, full_pages; + uint curr_group= 0; + translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1; + translog_size_t done= 0; + struct st_translog_group_descriptor group; + DYNAMIC_ARRAY groups; + uint16 chunk3_size; + uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1; + uint16 last_page_capacity; + my_bool new_page_before_chunk0= 1, first_chunk0= 1; + uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1]; + uchar chunk2_header[1]= + { + TRANSLOG_CHUNK_NOHDR + }; + uint header_fixed_part= header_length + 2; + uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1); + + DBUG_ENTER("translog_write_variable_record_mgroup"); + + if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor), + 10, 10 CALLER_INFO)) + { + UNRECOVERABLE_ERROR(("init array failed")); + DBUG_RETURN(1); + } + + first_page= translog_get_current_page_rest(); + record_rest= parts->record_length - (first_page - 1); + DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest)); + + if (record_rest < buffer_rest) + { + DBUG_PRINT("info", ("too many free space because changing header")); + buffer_rest-= log_descriptor.page_capacity_chunk_2; + DBUG_ASSERT(record_rest >= buffer_rest); + } + + do + { + group.addr= horizon= log_descriptor.horizon; + cursor= log_descriptor.bc; + cursor.chaser= 1; + if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255) + { + /* suzeof(uint8) == 256 is max number of chunk in multi-chunks group */ + full_pages= 255; + buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2; + } + /* + group chunks = + full pages + first page (which actually can be full, too. + But here we assign number of chunks - 1 + */ + group.num= full_pages; + if (insert_dynamic(&groups, (gptr) &group)) + { + translog_unlock(); + delete_dynamic(&groups); + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + + DBUG_PRINT("info", ("chunk #%u first_page: %u (%u), full_pages: %u (%lu), " + "Left %lu", + groups.elements, + first_page, first_page - 1, + full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + parts->record_length - (first_page - 1 + buffer_rest) - + done)); + translog_advance_pointer(full_pages, 0); + + rc|= translog_unlock(); + + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + delete_dynamic(&groups); + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + DBUG_RETURN(1); + } + + translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); + translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + "Left: %lu", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, + (ulong) horizon.rec_offset, + (ulong) (parts->record_length - (first_page - 1) - + done))); + + for (i= 0; i < full_pages; i++) + { + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + { + delete_dynamic(&groups); + DBUG_RETURN(1); + } + + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)" + "Left: %lu", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, + (ulong) horizon.rec_offset, + (ulong) (parts->record_length - (first_page - 1) - + i * log_descriptor.page_capacity_chunk_2 - + done))); + } + + done+= (first_page - 1 + buffer_rest); + + /* TODO: made separate function for following */ + rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + delete_dynamic(&groups); + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + DBUG_RETURN(1); + } + rc= translog_buffer_lock(cursor.buffer); + if (!rc) + translog_buffer_decrease_writers(cursor.buffer); + rc|= translog_buffer_unlock(cursor.buffer); + if (rc) + { + delete_dynamic(&groups); + DBUG_RETURN(1); + } + + translog_lock(); + + first_page= translog_get_current_page_rest(); + buffer_rest= translog_get_current_group_size(); + } while (first_page + buffer_rest < (uint) (parts->record_length - done)); + + group.addr= horizon= log_descriptor.horizon; + cursor= log_descriptor.bc; + cursor.chaser= 1; + group.num= 0; /* 0 because it does not matter + */ + if (insert_dynamic(&groups, (gptr) &group)) + { + delete_dynamic(&groups); + translog_unlock(); + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + record_rest= parts->record_length - done; + DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest)); + if (first_page <= record_rest + 1) + { + chunk2_page= 1; + record_rest-= (first_page - 1); + full_pages= record_rest / log_descriptor.page_capacity_chunk_2; + record_rest= (record_rest % log_descriptor.page_capacity_chunk_2); + last_page_capacity= page_capacity; + } + else + { + chunk2_page= full_pages= 0; + last_page_capacity= first_page; + } + chunk3_size= 0; + chunk3_pages= 0; + if (last_page_capacity > record_rest + 1 && record_rest != 0) + { + if (last_page_capacity > + record_rest + header_fixed_part + groups.elements * (7 + 1)) + { + /* 1 record of type 0 */ + chunk3_pages= 0; + } + else + { + chunk3_pages= 1; + if (record_rest + 2 == last_page_capacity) + { + chunk3_size= record_rest - 1; + record_rest= 1; + } + else + { + chunk3_size= record_rest; + record_rest= 0; + } + } + } + /* + A first non-full page will hold type 0 chunk only if it fit in it with + all its headers + */ + while (page_capacity < + record_rest + header_fixed_part + + (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1)) + chunk0_pages++; + DBUG_PRINT("info", ("chunk0_pages %u, groups %u, groups per full page %u, " + "Group on last page %u", + chunk0_pages, groups.elements, + groups_per_page, + (groups.elements - + ((page_capacity - header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)))); + DBUG_PRINT("info", ("first_page: %u, chunk2 %u full_pages: %u (%lu), " + "chunk3 %u (%u), rest %u", + first_page, + chunk2_page, full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + chunk3_pages, (uint) chunk3_size, (uint) record_rest)); + translog_advance_pointer(full_pages + chunk3_pages + + (chunk0_pages - 1), + record_rest + header_fixed_part + + (groups.elements - + ((page_capacity - header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)) * (7 + 1)); + translog_unlock(); + + if (chunk2_page) + { + DBUG_PRINT("info", ("chunk 2 to finish first page")); + translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); + translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + "Left: %lu", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, + (ulong) horizon.rec_offset, + (ulong) (parts->record_length - (first_page - 1) - + done))); + } + else if (chunk3_pages) + { + DBUG_PRINT("info", ("chunk 3")); + DBUG_ASSERT(full_pages == 0); + uchar chunk3_header[3]; + chunk3_header[0]= TRANSLOG_CHUNK_LNGTH; + int2store(chunk3_header + 1, chunk3_size); + translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header); + translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts); + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + "Left: %lu", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, + (ulong) horizon.rec_offset, + (ulong) (parts->record_length - chunk3_size - done))); + chunk3_pages= 0; + } + else + { + DBUG_PRINT("info", ("no new_page_before_chunk0")); + new_page_before_chunk0= 0; + } + + for (i= 0; i < full_pages; i++) + { + DBUG_ASSERT(chunk2_page != 0); + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + { + delete_dynamic(&groups); + DBUG_RETURN(1); + } + + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + "Left: %lu", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, + (ulong) horizon.rec_offset, + (ulong) (parts->record_length - (first_page - 1) - + i * log_descriptor.page_capacity_chunk_2 - + done))); + } + + if (chunk3_pages && + translog_write_variable_record_chunk3_page(parts, + chunk3_size, + &horizon, &cursor)) + { + delete_dynamic(&groups); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset, + (uint) horizon.file_no, (ulong) horizon.rec_offset)); + + + *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN); + int2store(chunk0_header + 1, short_trid); + translog_write_variable_record_1group_code_len(chunk0_header + 3, + parts->record_length, + header_length); + do + { + int limit; + if (new_page_before_chunk0) + { + rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + delete_dynamic(&groups); + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + DBUG_RETURN(1); + } + } + new_page_before_chunk0= 1; + + if (first_chunk0) + { + *lsn= horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, + lsn, parts)) + { + DBUG_RETURN(1); + } + + first_chunk0= 0; + } + + /* + A first non-full page will hold type 0 chunk only if it fit in it with + all its headers => the fist page is full or number of groups less then + possible number of full page. + */ + limit= (groups_per_page < groups.elements - curr_group ? + groups_per_page : groups.elements - curr_group); + DBUG_PRINT("info", ("Groups: %u curr %u, limit %u", + (uint) groups.elements, (uint) curr_group, + (uint) limit)); + + if (chunk0_pages == 1) + { + DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u", + (uint) limit, (uint) record_rest, + (uint) (2 + limit * (7 + 1) + record_rest))); + int2store(chunk0_header + header_length - 2, + 2 + limit * (7 + 1) + record_rest); + } + else + { + DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u", + (uint) limit, (uint) (2 + limit * (7 + 1)))); + int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1)); + } + int2store(chunk0_header + header_length, groups.elements - curr_group); + translog_write_data_on_page(&horizon, &cursor, header_fixed_part, + chunk0_header); + for (i= curr_group; i < limit + curr_group; i++) + { + get_dynamic(&groups, (gptr) &group, i); + lsn7store(group_desc, &group.addr); + group_desc[7]= group.num; + translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc); + } + + if (chunk0_pages == 1 && record_rest != 0) + translog_write_parts_on_page(&horizon, &cursor, record_rest, parts); + + chunk0_pages--; + curr_group+= limit; + + } while (chunk0_pages != 0); + rc= translog_buffer_lock(cursor.buffer); + if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0) + cursor.buffer->last_lsn= *lsn; + translog_buffer_decrease_writers(cursor.buffer); + rc|= translog_buffer_unlock(cursor.buffer); + + delete_dynamic(&groups); + DBUG_RETURN(rc); +} + + +/* + Write the variable length log record + + SYNOPSIS + translog_write_variable_record() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_write_variable_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + void *tcb) +{ + struct st_translog_buffer *buffer_to_flush= NULL; + uint header_length1= 1 + 2 + 2 + + translog_variable_record_length_bytes(parts->record_length); + ulong buffer_rest; + uint page_rest; + uchar compressed_LSNs[2 * 7]; /* Max number of such LSNs per + record is 2 */ + + DBUG_ENTER("translog_write_variable_record"); + + translog_lock(); + DBUG_PRINT("info", ("horizon (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset)); + page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size; + DBUG_PRINT("info", ("header length %u, page_rest: %u", + header_length1, page_rest)); + + /* + header and part which we should read have to fit in one chunk + TODO: allow to divide readable header + */ + if (page_rest < + (header_length1 + log_record_type_descriptor[type].read_header_len)) + { + DBUG_PRINT("info", + ("Next page, size: %u, header: %u + %u", + log_descriptor.bc.current_page_size, + header_length1, + log_record_type_descriptor[type].read_header_len)); + translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, + &buffer_to_flush); + page_rest= log_descriptor.page_capacity_chunk_2 + 1; + DBUG_PRINT("info", ("page_rest: %u", page_rest)); + } + + /* + To minimize compressed size we will compress always relative to + very first chunk address (log_descriptor.horizon for now) + */ + if (log_record_type_descriptor[type].compresed_LSN > 0) + { + if (translog_relative_LSN_encode(parts, &log_descriptor.horizon, + log_record_type_descriptor[type]. + compresed_LSN, compressed_LSNs)) + { + int rc= translog_unlock(); + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + DBUG_RETURN(1); + } + /* recalculate header length after compression */ + header_length1= 1 + 2 + 2 + + translog_variable_record_length_bytes(parts->record_length); + DBUG_PRINT("info", ("after compressing LSN(s) header length %u, " + "record length %lu", + header_length1, parts->record_length)); + } + + /* TODO: check space on current page for header + few bytes */ + if (page_rest >= parts->record_length + header_length1) + { + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_1chunk(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, tcb)); + } + + buffer_rest= translog_get_current_group_size(); + + if (buffer_rest >= parts->record_length + header_length1 - page_rest) + { + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_1group(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, tcb)); + } + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_mgroup(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, + buffer_rest, tcb)); + DBUG_RETURN(0); +} + + +/* + Write the fixed and pseudo-fixed log record + + SYNOPSIS + translog_write_fixed_record() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_write_fixed_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + void *tcb) +{ + struct st_translog_buffer *buffer_to_flush= NULL; + uchar chunk1_header[1 + 2]; + uchar compressed_LSNs[2 * 7]; /* Max number of such LSNs per + record is 2 */ + struct st_translog_part part; + int rc; + DBUG_ENTER("translog_write_fixed_record"); + DBUG_ASSERT((log_record_type_descriptor[type].class == + LOGRECTYPE_FIXEDLENGTH && + parts->record_length == + log_record_type_descriptor[type].fixed_length) || + (log_record_type_descriptor[type].class == + LOGRECTYPE_PSEUDOFIXEDLENGTH && + (parts->record_length - + log_record_type_descriptor[type].compresed_LSN * 2) <= + log_record_type_descriptor[type].fixed_length)); + + translog_lock(); + DBUG_PRINT("info", ("horizon (%u,0x%lx)", + (uint) log_descriptor.horizon.file_no, + (ulong) log_descriptor.horizon.rec_offset)); + + DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_PRINT("info", + ("Page size: %u, record %u, next cond %d", + log_descriptor.bc.current_page_size, + (parts->record_length - + log_record_type_descriptor[type].compresed_LSN * 2 + 3), + ((((uint) log_descriptor.bc.current_page_size) + + (parts->record_length - + log_record_type_descriptor[type].compresed_LSN * 2 + 3)) > + TRANSLOG_PAGE_SIZE))); + /* + check that there is enough place on current page: + (log_record_type_descriptor[type].fixed_length - economized on compressed + LSNs) bytes + */ + if ((((uint) log_descriptor.bc.current_page_size) + + (parts->record_length - + log_record_type_descriptor[type].compresed_LSN * 2 + 3)) > + TRANSLOG_PAGE_SIZE) + { + DBUG_PRINT("info", ("Next page")); + translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, + &buffer_to_flush); + } + + *lsn= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, + lsn, parts)) + { + DBUG_RETURN(1); + } + + + /* compress LSNs */ + if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH) + { + DBUG_ASSERT(log_record_type_descriptor[type].compresed_LSN > 0); + if (translog_relative_LSN_encode(parts, lsn, + log_record_type_descriptor[type]. + compresed_LSN, compressed_LSNs)) + { + rc= 1; + goto err; + } + } + + /* + Write the whole record at once (we sure that there is enough place on + the destination page + */ + DBUG_ASSERT(parts->current != 0); /* first part is left for + header */ + parts->total_record_length+= (part.len= 1 + 2); + part.buff= chunk1_header; + *chunk1_header= (uchar) (type |TRANSLOG_CHUNK_FIXED); + int2store(chunk1_header + 1, short_trid); + parts->current--; + set_dynamic(&parts->parts, (gptr) &part, parts->current); + + rc= translog_write_parts_on_page(&log_descriptor.horizon, + &log_descriptor.bc, + parts->total_record_length, parts); + + log_descriptor.bc.buffer->last_lsn= *lsn; +err: + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + DBUG_RETURN(rc); +} + + +/* + Write the log record + + SYNOPSIS + translog_write_record() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + tcb Transaction control block pointer for hooks by + record log type + partN_length length of Ns part of the log + partN_buffer pointer on Ns part buffer + 0 sign of the end of parts + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_write_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + void *tcb, + translog_size_t part1_length, + uchar *part1_buff, ...) +{ + struct st_translog_parts parts; + va_list pvar; + int rc; + DBUG_ENTER("translog_write_record"); + DBUG_PRINT("enter", ("type %u, ShortTrID %u", (uint) type, (uint)short_trid)); + + /* move information about parts into dynamic array */ + if (init_dynamic_array(&parts.parts, sizeof(struct st_translog_part), + 10, 10 CALLER_INFO)) + { + UNRECOVERABLE_ERROR(("init array failed")); + DBUG_RETURN(1); + } + { + struct st_translog_part part; + + /* reserve place for header */ + parts.current= 1; + part.len= 0; + part.buff= 0; + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + + parts.record_length= part.len= part1_length; + part.buff= part1_buff; + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("record length: %lu, %lu ...", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + + /* count record length */ + va_start(pvar, part1_buff); + for (;;) + { + part.len= va_arg(pvar, translog_size_t); + if (part.len == 0) + break; + parts.record_length+= part.len; + part.buff= va_arg(pvar, uchar*); + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("record length: %lu, %lu ...", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + } + va_end(pvar); + + /* + start total_record_length from record_length then overhead will + be add + */ + parts.total_record_length= parts.record_length; + } + va_end(pvar); + DBUG_PRINT("info", ("record length: %lu, %lu", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + + /* process this parts */ + if (!(rc= (log_record_type_descriptor[type].prewrite_hook && + (*log_record_type_descriptor[type].prewrite_hook) (type, tcb, + &parts)))) + { + switch (log_record_type_descriptor[type].class) + { + case LOGRECTYPE_VARIABLE_LENGTH: + { + rc= translog_write_variable_record(lsn, type, short_trid, &parts, tcb); + break; + } + case LOGRECTYPE_PSEUDOFIXEDLENGTH: + case LOGRECTYPE_FIXEDLENGTH: + { + rc= translog_write_fixed_record(lsn, type, short_trid, &parts, tcb); + break; + } + case LOGRECTYPE_NOT_ALLOWED: + default: + DBUG_ASSERT(0); + rc= 1; + } + } + + delete_dynamic(&parts.parts); + DBUG_RETURN(rc); +} + + +/* + Decode compressed (relative) LSN(s) + + SYNOPSIS + translog_relative_lsn_decode() + base_lsn LSN for encoding + src Decode LSN(s) from here + dst Put decoded LSNs here + lsns number of LSN(s) + + RETURN + position in sources after decoded LSN(s) +*/ + +static uchar *translog_relative_LSN_decode(LSN *base_lsn, + uchar *src, uchar *dst, uint lsns) +{ + uint i; + for (i= 0; i < lsns; i++, dst+= 7) + { + src= translog_get_LSN_from_diff(base_lsn, src, dst); + } + return src; +} + +/* + Get header of fixed/pseudo length record and call hook for it processing + + SYNOPSIS + translog_fixed_length_header() + page Pointer to the buffer with page where LSN chunk is + placed + page_offset Offset of the first chunk in the page + buff Buffer to be filled with header data + + RETURN + 0 - error + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_fixed_length_header(uchar *page, + translog_size_t page_offset, + TRANSLOG_HEADER_BUFFER *buff) +{ + struct st_log_record_type_descriptor *desc= + log_record_type_descriptor + buff->type; + uchar *src= page + page_offset + 3; + uchar *dst= buff->header; + uchar *start= src; + uint lsns= desc->compresed_LSN; + uint length= desc->fixed_length + (lsns * 2); + + DBUG_ENTER("translog_fixed_length_header"); + + buff->record_length= length; + + if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH) + { + DBUG_ASSERT(lsns > 0); + src= translog_relative_LSN_decode(&buff->lsn, src, dst, lsns); + lsns*= 7; + dst+= lsns; + length-= lsns; + buff->compressed_LSN_economy= (uint16) (lsns - (src - start)); + } + else + buff->compressed_LSN_economy= 0; + + memmove(dst, src, length); + buff->non_header_data_start_offset= page_offset + + ((src + length) - (page + page_offset)); + buff->non_header_data_len= 0; + DBUG_RETURN(buff->record_length); +} + + +/* + Free resources used by TRANSLOG_HEADER_BUFFER + + SYNOPSIS + translog_free_record_header(); +*/ + +void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff) +{ + DBUG_ENTER("translog_free_record_header"); + if (buff->groups_no != 0) + { + my_free((gptr) buff->groups, MYF(0)); + buff->groups_no= 0; + } + DBUG_VOID_RETURN; +} + + +/* + Set current horizon in the scanner data structure + + SYNOPSIS + translog_scanner_set_horizon() + scanner Information about current chunk during scanning +*/ + +static void translog_scanner_set_horizon(struct st_translog_scanner_data + *scanner) +{ + translog_lock(); + scanner->horizon= log_descriptor.horizon; + translog_unlock(); +} + + +/* + Set last page in the scanner data structure + + SYNOPSIS + translog_scanner_set_last_page() + scanner Information about current chunk during scanning + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_scanner_set_last_page(struct st_translog_scanner_data + *scanner) +{ + my_bool page_ok; + scanner->last_file_page= scanner->page_addr; + if (translog_get_last_page_addr(&scanner->last_file_page, &page_ok)) + return (1); + return (0); +} + + +/* + Init scanner + + SYNOPSIS + translog_init_scanner() + lsn LSN with which it have to be inited + fixed_horizon true if it is OK do not read records which was written + after scanning beginning + scanner scanner which have to be inited + + RETURN + 0 - OK + 1 - Error +*/ +static my_bool translog_init_scanner(LSN *lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner) +{ + TRANSLOG_VALIDATOR_DATA data= + { + &scanner->page_addr, 0 + }; + + DBUG_ENTER("translog_init_scanner"); + DBUG_PRINT("enter", ("LSN: (0x%lx,0x%lx)", + (ulong) lsn->file_no, (ulong) lsn->rec_offset)); + DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0); + scanner->page_offset= lsn->rec_offset % TRANSLOG_PAGE_SIZE; + + scanner->fixed_horizon= fixed_horizon; + + translog_scanner_set_horizon(scanner); + DBUG_PRINT("info", ("Horizon: (0x%lx,0x%lx)", + (ulong) scanner->horizon.file_no, + (ulong) scanner->horizon.rec_offset)); + + /* lsn < horizon */ + DBUG_ASSERT(lsn->file_no < scanner->horizon.file_no || + (lsn->file_no == scanner->horizon.file_no && + lsn->rec_offset < scanner->horizon.rec_offset)); + + scanner->page_addr= *lsn; + scanner->page_addr.rec_offset-= scanner->page_offset; + + if (translog_scanner_set_last_page(scanner)) + DBUG_RETURN(1); + + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Checks End of the Log + + SYNOPSIS + translog_scanner_eol() + scanner Information about current chunk during scanning + + RETURN + 1 - End of the Log + 0 - OK +*/ +static my_bool translog_scanner_eol(struct st_translog_scanner_data *scanner) +{ + DBUG_ENTER("translog_scanner_eol"); + DBUG_PRINT("enter", + ("Horizon: (%lu, 0x%lx), Current: (%lu, 0x%lx+0x%x=0x%lx)", + (ulong) scanner->horizon.file_no, + (ulong) scanner->horizon.rec_offset, + (ulong) scanner->page_addr.file_no, + (ulong) scanner->page_addr.rec_offset, + (uint) scanner->page_offset, + (ulong) (scanner->page_addr.rec_offset + scanner->page_offset))); + if (scanner->horizon.file_no > scanner->page_addr.file_no || + (scanner->horizon.file_no == scanner->page_addr.file_no && + scanner->horizon.rec_offset > (scanner->page_addr.rec_offset + + scanner->page_offset))) + { + DBUG_PRINT("info", ("Horizon is not reached")); + DBUG_RETURN(0); + } + if (scanner->fixed_horizon) + { + DBUG_PRINT("info", ("Horizon is fixed and reached")); + DBUG_RETURN(1); + } + translog_scanner_set_horizon(scanner); + DBUG_PRINT("info", + ("Horizon is re-read, EOL: %d", + scanner->horizon.file_no <= scanner->page_addr.file_no && + (scanner->horizon.file_no != scanner->page_addr.file_no || + scanner->horizon.rec_offset <= (scanner->page_addr.rec_offset + + scanner->page_offset)))); + DBUG_RETURN(scanner->horizon.file_no <= scanner->page_addr.file_no && + (scanner->horizon.file_no != scanner->page_addr.file_no || + scanner->horizon.rec_offset <= (scanner->page_addr.rec_offset + + scanner->page_offset))); +} + + +/* + Cheks End of the Page + + SYNOPSIS + translog_scanner_eop() + scanner Information about current chunk during scanning + + RETURN + 1 - End of the Page + 0 - OK +*/ +static my_bool translog_scanner_eop(struct st_translog_scanner_data *scanner) +{ + DBUG_ENTER("translog_scanner_eop"); + DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE || + scanner->page[scanner->page_offset] == 0); +} + + +/* + Checks End of the File (I.e. we are scanning last page, which do not + mean end of this page) + + SYNOPSIS + translog_scanner_eof() + scanner Information about current chunk during scanning + + RETURN + 1 - End of the File + 0 - OK +*/ +static my_bool translog_scanner_eof(struct st_translog_scanner_data *scanner) +{ + DBUG_ENTER("translog_scanner_eof"); + DBUG_ASSERT(scanner->page_addr.file_no == scanner->last_file_page.file_no); + DBUG_PRINT("enter", ("curr Page 0x%lx, last page 0x%lx, " + "normal EOF %d", + scanner->page_addr.rec_offset, + scanner->last_file_page.rec_offset, + scanner->page_addr.rec_offset == + scanner->last_file_page.rec_offset)); + /* + TODO: detect damaged file EOF, + TODO: issue warning if damaged file EOF detected + */ + DBUG_RETURN(scanner->page_addr.rec_offset == + scanner->last_file_page.rec_offset); +} + + +/* + Move scanner to the next chunk + + SYNOPSIS + translog_get_next_chunk() + scanner Information about current chunk during scanning + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) +{ + DBUG_ENTER("translog_get_next_chunk"); + uint16 len= translog_get_total_chunk_length(scanner->page, + scanner->page_offset); + if (len == 0) + DBUG_RETURN(1); + scanner->page_offset+= len; + + if (translog_scanner_eol(scanner)) + { + scanner->page= &end_of_log; + scanner->page_offset= 0; + DBUG_RETURN(0); + } + if (translog_scanner_eop(scanner)) + { + if (translog_scanner_eof(scanner)) + { + DBUG_PRINT("info", ("horizon (%lu,0x%lx) pageaddr (%lu,0x%lx)", + (ulong) scanner->horizon.file_no, + (ulong) scanner->horizon.rec_offset, + (ulong) scanner->page_addr.file_no, + (ulong) scanner->page_addr.rec_offset)); + /* if it is log end it have to be caught before */ + DBUG_ASSERT(scanner->horizon.file_no > scanner->page_addr.file_no); + scanner->page_addr.file_no++; + scanner->page_addr.rec_offset= TRANSLOG_PAGE_SIZE; + if (translog_scanner_set_last_page(scanner)) + DBUG_RETURN(1); + } + else + { + scanner->page_addr.rec_offset+= TRANSLOG_PAGE_SIZE; + } + { + TRANSLOG_VALIDATOR_DATA data= + { + &scanner->page_addr, 0 + }; + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + } + scanner->page_offset= translog_get_first_chunk_offset(scanner->page); + if (translog_scanner_eol(scanner)) + { + scanner->page= &end_of_log; + scanner->page_offset= 0; + DBUG_RETURN(0); + } + DBUG_ASSERT(scanner->page[scanner->page_offset] != 0); + } + DBUG_RETURN(0); +} + + +/* + Get header of variable length record and call hook for it processing + + SYNOPSIS + translog_variable_length_header() + page Pointer to the buffer with page where LSN chunk is + placed + page_offset Offset of the first chunk in the page + buff Buffer to be filled with header data + scanner If present should be moved to the header page if + it differ from LSN page + + RETURN + 0 - error + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_variable_length_header(uchar *page, + translog_size_t page_offset, + TRANSLOG_HEADER_BUFFER *buff, + struct + st_translog_scanner_data + *scanner) +{ + struct st_log_record_type_descriptor *desc= + log_record_type_descriptor + buff->type; + uchar *src= page + page_offset + 1 + 2; + uchar *dst= buff->header; + LSN base_lsn; + uint lsns= desc->compresed_LSN; + uint16 chunk_len; + uint16 length= desc->read_header_len + (lsns * 2); + uint16 buffer_length= length; + uint16 body_len; + struct st_translog_scanner_data internal_scanner; + + DBUG_ENTER("translog_variable_length_header"); + + buff->record_length= translog_variable_record_1group_decode_len(&src); + chunk_len= uint2korr(src); + DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, length %u, bufflen %u", + (ulong) buff->record_length, (uint) chunk_len, + (uint) length, (uint) buffer_length)); + if (chunk_len == 0) + { + uint16 page_rest; + DBUG_PRINT("info", ("1 group")); + src+= 2; + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + + base_lsn= buff->lsn; + body_len= (page_rest < buff->record_length ? + page_rest : buff->record_length); + } + else + { + uint grp_no, curr; + uint header_to_skip; + uint16 page_rest; + + DBUG_PRINT("info", ("multi-group")); + grp_no= buff->groups_no= uint2korr(src + 2); + if ((buff->groups= + (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * buff->groups_no, + MYF(0))) == 0) + DBUG_RETURN(0); + DBUG_PRINT("info", ("Groups: %u", (uint) grp_no)); + src+= (2 + 2); + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + curr= 0; + header_to_skip= src - (page + page_offset); + buff->chunk0_pages= 0; + + for (;;) + { + uint i; + uint read= grp_no; + + buff->chunk0_pages++; + if (page_rest < grp_no * (7 + 1)) + read= page_rest / (7 + 1); + DBUG_PRINT("info", ("Read chunk0 page#%u read %u left %u start from %u", + buff->chunk0_pages, read, grp_no, curr)); + for (i= 0; i < read; i++, curr++) + { + DBUG_ASSERT(curr < buff->groups_no); + lsn7korr(&buff->groups[curr].addr, src + i * (7 + 1)); + buff->groups[curr].num= src[i * (7 + 1) + 7]; + DBUG_PRINT("info", ("group #%u (%u,0x%lx) chunks %u", + curr, + (uint) buff->groups[curr].addr.file_no, + (ulong) buff->groups[curr].addr.rec_offset, + (uint) buff->groups[curr].num)); + } + grp_no-= read; + if (grp_no == 0) + { + if (scanner) + { + buff->chunk0_data_addr= scanner->page_addr; + buff->chunk0_data_addr.rec_offset+= (page_offset + header_to_skip + + i * (7 + 1)); + } + else + { + buff->chunk0_data_addr= buff->lsn; + buff->chunk0_data_addr.rec_offset+= (header_to_skip + i * (7 + 1)); + } + buff->chunk0_data_len= chunk_len - 2 - i * (7 + 1); + DBUG_PRINT("info", ("Data address (%u,0x%lx), len: %u", + (uint) buff->chunk0_data_addr.file_no, + (ulong) buff->chunk0_data_addr.rec_offset, + buff->chunk0_data_len)); + break; + } + if (scanner == NULL) + { + DBUG_PRINT("info", ("use internal scanner for header reding")); + scanner= &internal_scanner; + translog_init_scanner(&buff->lsn, 1, scanner); + } + translog_get_next_chunk(scanner); + page= scanner->page; + page_offset= scanner->page_offset; + src= page + page_offset + header_to_skip; + chunk_len= uint2korr(src - 2 - 2); + DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len)); + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + } + + if (scanner == NULL) + { + DBUG_PRINT("info", ("use internal scanner")); + scanner= &internal_scanner; + } + + base_lsn= buff->groups[0].addr; + translog_init_scanner(&base_lsn, 1, scanner); + /* first group chunk is always chunk type 2 */ + page= scanner->page; + page_offset= scanner->page_offset; + src= page + page_offset + 1; + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + body_len= page_rest; + } + if (lsns) + { + uchar *start= src; + src= translog_relative_LSN_decode(&base_lsn, src, dst, lsns); + lsns*= 7; + dst+= lsns; + length-= lsns; + buff->record_length+= (buff->compressed_LSN_economy= + (uint16) (lsns - (src - start))); + DBUG_PRINT("info", ("lsns: %u, length %u, economy %u, new length %lu", + lsns / 7, (uint) length, + (uint) buff->compressed_LSN_economy, + (ulong) buff->record_length)); + body_len-= (src - start); + } + else + buff->compressed_LSN_economy= 0; + + DBUG_ASSERT(body_len >= length); + body_len-= length; + memmove(dst, src, length); + buff->non_header_data_start_offset= src + length - page; + buff->non_header_data_len= body_len; + DBUG_PRINT("info", ("non_header_data_start_offset %u len %u buffer %u", + buff->non_header_data_start_offset, + buff->non_header_data_len, buffer_length)); + DBUG_RETURN(buffer_length); +} + + +/* + Read record header from the given buffer + + SYNOPSIS + translog_read_record_header_from_buffer() + page page content buffer + page_offset offset of the chunk in the page + buff destination buffer + scanner if it is need this scanner will be moved to the + record header page (differ from LSN page in case of + multi-group records +*/ + +translog_size_t +translog_read_record_header_from_buffer(uchar *page, + uint16 page_offset, + TRANSLOG_HEADER_BUFFER *buff, + struct + st_translog_scanner_data *scanner) +{ + DBUG_ENTER("translog_read_record_header_from_buffer"); + DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) == + TRANSLOG_CHUNK_LSN || + (page[page_offset] & TRANSLOG_CHUNK_TYPE) == + TRANSLOG_CHUNK_FIXED); + buff->type= (page[page_offset] & TRANSLOG_REC_TYPE); + buff->short_trid= uint2korr(page + page_offset + 1); + DBUG_PRINT("info", ("Type %u, Sort TrID %u, LSN (%u,0x%lx)", + (uint) buff->type, (uint)buff->short_trid, + buff->lsn.file_no, buff->lsn.rec_offset)); + /* Read required bytes from the header and call hook */ + switch (log_record_type_descriptor[buff->type].class) + { + case LOGRECTYPE_VARIABLE_LENGTH: + DBUG_RETURN(translog_variable_length_header(page, page_offset, buff, + scanner)); + case LOGRECTYPE_PSEUDOFIXEDLENGTH: + case LOGRECTYPE_FIXEDLENGTH: + DBUG_RETURN(translog_fixed_length_header(page, page_offset, buff)); + default: + DBUG_ASSERT(0); + } + DBUG_RETURN(0); +} + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + buff log record header buffer + + NOTE + - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed + correctly. + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 - error + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_read_record_header(LSN *lsn, + TRANSLOG_HEADER_BUFFER *buff) +{ + uchar buffer[TRANSLOG_PAGE_SIZE], *page; + translog_size_t page_offset= lsn->rec_offset % TRANSLOG_PAGE_SIZE; + + DBUG_ENTER("translog_read_record_header"); + DBUG_PRINT("enter", ("LSN: (0x%lx,0x%lx)", + (ulong) lsn->file_no, (ulong) lsn->rec_offset)); + DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0); + + buff->lsn= *lsn; + buff->groups_no= 0; + { + TRANSLOG_ADDRESS addr= *lsn; + TRANSLOG_VALIDATOR_DATA data= + { + &addr, 0 + }; + addr.rec_offset-= page_offset; + if ((page= translog_get_page(&data, buffer)) == NULL) + DBUG_RETURN(0); + } + + DBUG_RETURN(translog_read_record_header_from_buffer(page, page_offset, + buff, 0)); +} + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header_scan() + scan scanner position to read + buff log record header buffer + move_scanner request to move scanner to the header position + + NOTE + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 - error + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t +translog_read_record_header_scan(struct st_translog_scanner_data + *scanner, + TRANSLOG_HEADER_BUFFER *buff, + my_bool move_scanner) +{ + DBUG_ENTER("translog_read_record_header_scan"); + DBUG_PRINT("enter", ("Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), " + "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d", + (uint) scanner->page_addr.file_no, + (ulong) scanner->page_addr.rec_offset, + (uint) scanner->horizon.file_no, + (ulong) scanner->horizon.rec_offset, + (uint) scanner->last_file_page.file_no, + (ulong) scanner->last_file_page.rec_offset, + (uint) scanner->page_offset, + (uint) scanner->page_offset, scanner->fixed_horizon)); + buff->groups_no= 0; + buff->lsn= scanner->page_addr; + buff->lsn.rec_offset+= scanner->page_offset; + DBUG_RETURN(translog_read_record_header_from_buffer(scanner->page, + scanner->page_offset, + buff, + (move_scanner ? + scanner : 0))); +} + + +/* + Read record header and some fixed part of the next record (the part + depend on record type). + + SYNOPSIS + translog_read_next_record_header() + lsn log record serial number (address of the record) + previous to the record which will be read + If LSN present scanner will be initialized from it, + do not use LSN after initialization for fast scanning. + buff log record header buffer + fixed_horizon true if it is OK do not read records which was written + after scanning beginning + scanner data for scanning if lsn is NULL scanner data + will be used for continue scanning. + The scanner can be NULL. + + NOTE + - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed + correctly (lsn in buffer will be replaced by next record, but initial + lsn will be read correctly). + - it is like translog_read_record_header, but read next record, so see + its NOTES. + - in case of end of the log buff->lsn will be set to + (CONTROL_FILE_IMPOSSIBLE_FILENO, 0) + RETURN + 0 - error + TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 - End of the log + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ +translog_size_t translog_read_next_record_header(LSN *lsn, + TRANSLOG_HEADER_BUFFER *buff, + my_bool fixed_horizon, + struct + st_translog_scanner_data + *scanner) +{ + struct st_translog_scanner_data internal_scanner; + uint8 chunk_type; + + buff->groups_no= 0; /* to be sure that we will free + it right */ + + DBUG_ENTER("translog_read_next_record_header"); + DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner)); + if (scanner == NULL) + { + DBUG_ASSERT(lsn != NULL); + scanner= &internal_scanner; + } + if (lsn) + { + if (translog_init_scanner(lsn, fixed_horizon, scanner)) + DBUG_RETURN(0); + DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0); + } + DBUG_PRINT("info", ("Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), " + "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d", + (uint) scanner->page_addr.file_no, + (ulong) scanner->page_addr.rec_offset, + (uint) scanner->horizon.file_no, + (ulong) scanner->horizon.rec_offset, + (uint) scanner->last_file_page.file_no, + (ulong) scanner->last_file_page.rec_offset, + (uint) scanner->page_offset, + (uint) scanner->page_offset, scanner->fixed_horizon)); + + do + { + if (translog_get_next_chunk(scanner)) + DBUG_RETURN(0); + chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE; + DBUG_PRINT("info", ("type %x, byte %x", (uint) chunk_type, + (uint) scanner->page[scanner->page_offset])); + } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type != + TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0); + + if (scanner->page[scanner->page_offset] == 0) + { + /* Last record was read */ + buff->lsn.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO; + buff->lsn.rec_offset= 0; + DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); /* just it is not error + */ + } + DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0)); +} + + +/* + Moves record data reader to the next chunk and fill the data reader + information about that chunk. + + SYNOPSIS + translog_record_read_next_chunk() + data data cursor + + RETURN + 0 - OK + 1 - Error +*/ +static my_bool translog_record_read_next_chunk(struct st_translog_reader_data + *data) +{ + translog_size_t new_current_offset= data->current_offset + data->chunk_size; + uint16 chunk_header_len, chunk_len; + uint8 type; + + DBUG_ENTER("translog_record_read_next_chunk"); + + if (data->eor) + { + DBUG_PRINT("info", ("end of the record flag set")); + DBUG_RETURN(1); + } + + if (data->header.groups_no && + data->header.groups_no - 1 != data->current_group && + data->header.groups[data->current_group].num == data->current_chunk) + { + /* Goto next group */ + data->current_group++; + data->current_chunk= 0; + DBUG_PRINT("info", ("skip to group #%u", data->current_group)); + translog_init_scanner(&data->header.groups[data->current_group].addr, + 1, &data->scanner); + } + else + { + data->current_chunk++; + if (translog_get_next_chunk(&data->scanner)) + DBUG_RETURN(1); + } + type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE; + + if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no) + { + DBUG_PRINT("info", + ("Last chunk: data len %u, offset %u group %u of %u", + data->header.chunk0_data_len, data->scanner.page_offset, + data->current_group, data->header.groups_no - 1)); + DBUG_ASSERT(data->header.groups_no - 1 == data->current_group); + DBUG_ASSERT(data->header.lsn.file_no == data->scanner.page_addr.file_no && + data->header.lsn.rec_offset == + data->scanner.page_addr.rec_offset + data->scanner.page_offset); + translog_init_scanner(&data->header.chunk0_data_addr, 1, &data->scanner); + data->chunk_size= data->header.chunk0_data_len; + data->body_offset= data->scanner.page_offset; + data->current_offset= new_current_offset; + data->eor= 1; + DBUG_RETURN(0); + } + + if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED) + { + data->eor= 1; + DBUG_RETURN(1); /* End of record */ + } + + chunk_header_len= + translog_get_chunk_header_length(data->scanner.page, + data->scanner.page_offset); + chunk_len= translog_get_total_chunk_length(data->scanner.page, + data->scanner.page_offset); + data->chunk_size= chunk_len - chunk_header_len; + data->body_offset= data->scanner.page_offset + chunk_header_len; + data->current_offset= new_current_offset; + DBUG_PRINT("info", ("grp: %u chunk %u body_offset %u, chunk_size %u, " + "current_offset %lu", + (uint) data->current_group, + (uint) data->current_chunk, + (uint) data->body_offset, + (uint) data->chunk_size, (ulong) data->current_offset)); + DBUG_RETURN(0); +} + + +/* + Initialize record reader data from LSN + + SYNOPSIS + translog_init_reader_data() + lsn reference to LSN we should start from + data reader data to initialize + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool translog_init_reader_data(LSN *lsn, + struct st_translog_reader_data *data) +{ + DBUG_ENTER("translog_init_reader_data"); + if (translog_init_scanner(lsn, 1, &data->scanner) || + (data->read_header= + translog_read_record_header_scan(&data->scanner, &data->header, 1)) == 0) + { + DBUG_RETURN(1); + } + data->body_offset= data->header.non_header_data_start_offset; + data->chunk_size= data->header.non_header_data_len; + data->current_offset= data->read_header; + data->current_group= 0; + data->current_chunk= 0; + data->eor= 0; + DBUG_PRINT("info", ("read_header %u, " + "body_offset %u, chunk_size %u, current_offset %lu", + (uint) data->read_header, + (uint) data->body_offset, + (uint) data->chunk_size, (ulong) data->current_offset)); + DBUG_RETURN(0); +} + + +/* + Read a part of the record. + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + offset from the beginning of the record beginning (read + by translog_read_record_header). + length length of record part which have to be read. + buffer buffer where to read the record part (have to be at + least 'length' bytes length) + + RETURN + length of data actually read +*/ + +translog_size_t translog_read_record(LSN *lsn, + translog_size_t offset, + translog_size_t length, + uchar *buffer, + struct st_translog_reader_data *data) +{ + translog_size_t requested_length= length; + translog_size_t end= offset + length; + struct st_translog_reader_data internal_data; + + DBUG_ENTER("translog_read_record"); + + if (data == NULL) + { + DBUG_ASSERT(lsn != NULL); + data= &internal_data; + } + if (lsn || + (offset < data->current_offset && + !(offset < data->read_header && offset + length < data->read_header))) + { + if (translog_init_reader_data(lsn, data)) + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Offset %lu, length %lu " + "Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), " + "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d", + (ulong) offset, (ulong) length, + (uint) data->scanner.page_addr.file_no, + (ulong) data->scanner.page_addr.rec_offset, + (uint) data->scanner.horizon.file_no, + (ulong) data->scanner.horizon.rec_offset, + (uint) data->scanner.last_file_page.file_no, + (ulong) data->scanner.last_file_page.rec_offset, + (uint) data->scanner.page_offset, + (uint) data->scanner.page_offset, + data->scanner.fixed_horizon)); + if (offset < data->read_header) + { + DBUG_PRINT("info", + ("enter header offset %lu, length %lu", + (ulong) offset, (ulong) length)); + uint16 len= (data->read_header < end ? data->read_header : end) - offset; + memmove(buffer, data->header.header + offset, len); + length-= len; + if (length == 0) + DBUG_RETURN(requested_length); + offset+= len; + buffer+= len; + DBUG_PRINT("info", + ("len: %u, offset %lu, curr %lu, length %lu", + len, (ulong) offset, (ulong) data->current_offset, + (ulong) length)); + } + /* TODO: find first page which we should read by offset */ + + /* read the record chunk by chunk */ + do + { + uint page_end= data->current_offset + data->chunk_size; + DBUG_PRINT("info", + ("enter body offset %lu, curr %lu, length %lu page_end %lu", + (ulong) offset, (ulong) data->current_offset, (ulong) length, + (ulong) page_end)); + if (offset < page_end) + { + DBUG_ASSERT(offset >= data->current_offset); + uint len= page_end - offset; + memmove(buffer, + data->scanner.page + data->body_offset + + (offset - data->current_offset), len); + length-= len; + if (length == 0) + DBUG_RETURN(requested_length); + offset+= len; + buffer+= len; + DBUG_PRINT("info", + ("len: %u, offset %lu, curr %lu, length %lu", + len, (ulong) offset, (ulong) data->current_offset, + (ulong) length)); + } + if (translog_record_read_next_chunk(data)) + DBUG_RETURN(requested_length - length); + } while (length != 0); + + DBUG_RETURN(requested_length); +} + + +/* + Force skipping to the next buffer + + SYNOPSIS + translog_force_current_buffer_to_finish() +*/ + +static void translog_force_current_buffer_to_finish() +{ + TRANSLOG_ADDRESS new_buff_begunning; + uint8 old_buffer_no= log_descriptor.bc.buffer_no; + uint8 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no; + struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer; + uchar *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_size; + uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size; + uint16 current_page_size; + + new_buff_begunning= log_descriptor.bc.buffer->offset; + new_buff_begunning.rec_offset+= log_descriptor.bc.buffer->size; + + DBUG_ENTER("translog_force_current_buffer_to_finish"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx, " + "Buffer addr (%lu,0x%lx), " + "Page addr: (%lu,0x%lx), " + "New Buff: (%lu,0x%lx), " + "size %lu (%lu), Pg: %u, left: %u", + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) log_descriptor.bc.buffer->offset.file_no, + (ulong) log_descriptor.bc.buffer->offset.rec_offset, + (ulong) log_descriptor.horizon.file_no, + (ulong) (log_descriptor.horizon.rec_offset - + log_descriptor.bc.current_page_size), + (ulong) new_buff_begunning.file_no, + (ulong) new_buff_begunning.rec_offset, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer), + (uint) log_descriptor.bc.current_page_size, + (uint) left)); + DBUG_ASSERT(log_descriptor.bc.ptr !=NULL); + DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) + %TRANSLOG_PAGE_SIZE == + log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(log_descriptor.horizon.file_no == + log_descriptor.bc.buffer->offset.file_no); + DBUG_ASSERT(log_descriptor.bc.buffer->offset.rec_offset + + (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == + log_descriptor.horizon.rec_offset); + if (left != TRANSLOG_PAGE_SIZE && left != 0) + { + /* + TODO: if 'left' is so small that can't hold any other record + then do not move the page + */ + DBUG_PRINT("info", ("left %u", (uint) left)); + + new_buff_begunning.rec_offset-= log_descriptor.bc.current_page_size; + current_page_size= log_descriptor.bc.current_page_size; + + bzero(log_descriptor.bc.ptr, left); + log_descriptor.bc.buffer->size+= left; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx, " + "Size: %lu", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) log_descriptor.bc.buffer->size)); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + } + else + { + left= 0; + log_descriptor.bc.current_page_size= 0; + } + + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + + { + uint16 write_counter= log_descriptor.bc.write_counter; + uint16 previous_offset= log_descriptor.bc.previous_offset; + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + log_descriptor.bc.buffer->offset= new_buff_begunning; + log_descriptor.bc.write_counter= write_counter; + log_descriptor.bc.previous_offset= previous_offset; + } + + if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + { + translog_put_sector_protection(data, &log_descriptor.bc); + if (left) + { + log_descriptor.bc.write_counter++; + log_descriptor.bc.previous_offset= current_page_size; + } + else + { + DBUG_PRINT("info", ("drop write_counter")); + log_descriptor.bc.write_counter= 0; + log_descriptor.bc.previous_offset= 0; + } + } + + if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_adler_crc(data + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc)); + int4store(data + 3 + 3 + 1, crc); + } + + if (left) + { + memmove(new_buffer->buffer, data, current_page_size); + log_descriptor.bc.ptr +=current_page_size; + log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_size= + current_page_size; + new_buffer->overlay= old_buffer; + } + else + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + + DBUG_VOID_RETURN; +} + +/* + Flush the log up to given LSN (included) + + SYNOPSIS + translog_flush() + lsn log record serial number up to which (inclusive) + the log have to be flushed + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_flush(LSN *lsn) +{ + LSN old_flushed, sent_to_file; + int rc= 0; + uint i; + my_bool full_circle= 0; + + DBUG_ENTER("translog_flush"); + DBUG_PRINT("enter", ("Flush up to LSN (%u,0x%lx)", + (uint) lsn->file_no, (ulong) lsn->rec_offset)); + + translog_lock(); + old_flushed= log_descriptor.flushed; + for (;;) + { + uint8 buffer_no= log_descriptor.bc.buffer_no; + uint8 buffer_start= buffer_no; + struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer; + + struct st_translog_buffer *buffer= log_descriptor.bc.buffer; + /* we can't flush in future */ + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *lsn) >= 0); + if (cmp_translog_addr(log_descriptor.flushed, *lsn) >= 0) + { + DBUG_PRINT("info", ("already flushed (%u,0x%lx)", + (uint) log_descriptor.flushed.file_no, + (ulong) log_descriptor.flushed.rec_offset)); + translog_unlock(); + DBUG_RETURN(0); + } + /* send to the file if it is not sent */ + translog_get_sent_to_file(&sent_to_file); + if (cmp_translog_addr(sent_to_file, *lsn) >= 0) + break; + + do + { + buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO; + buffer= log_descriptor.buffers + buffer_no; + translog_buffer_lock(buffer); + translog_buffer_unlock(buffer_unlock); + buffer_unlock= buffer; + if (buffer->file) + { + buffer_unlock= NULL; + if (buffer_start == buffer_no) + { + /* we made a circle */ + full_circle= 1; + translog_force_current_buffer_to_finish(); + } + break; + } + } while ((buffer_start != buffer_no) && + cmp_translog_addr(log_descriptor.flushed, *lsn) < 0); + if (buffer_unlock != NULL) + translog_buffer_unlock(buffer_unlock); + if (translog_buffer_flush(buffer)) + { + translog_buffer_unlock(buffer); + DBUG_RETURN(1); + } + translog_buffer_unlock(buffer); + if (!full_circle) + translog_lock(); + } + + for (i= old_flushed.file_no; i <= lsn->file_no; i++) + { + uint cache_index; + File file; + + if ((cache_index= log_descriptor.horizon.file_no - i) < OPENED_FILES_NUM) + { + /* file in the cache */ + if (log_descriptor.log_file_num[cache_index] == 0) + { + if ((log_descriptor.log_file_num[cache_index]= + open_logfile_by_number_no_cache(i)) == 0) + { + translog_unlock(); + DBUG_RETURN(1); + } + } + file= log_descriptor.log_file_num[cache_index]; + rc|= my_sync(file, MYF(MY_WME)); + } + else + { + /* very unlike situation with extremely small file size */ + File file= open_logfile_by_number_no_cache(i); + rc|= my_sync(file, MYF(MY_WME)); + my_close(file, MYF(MY_WME)); + } + } + log_descriptor.flushed= sent_to_file; + rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME)); + translog_unlock(); + DBUG_RETURN(rc); +} diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h new file mode 100644 index 00000000000..f4d939786fc --- /dev/null +++ b/storage/maria/ma_loghandler.h @@ -0,0 +1,314 @@ + +#ifndef _ma_loghandler_h +#define _ma_loghandler_h + +/* Transaction log flags */ +#define TRANSLOG_PAGE_CRC 1 +#define TRANSLOG_SECTOR_PROTECTION (1<<1) +#define TRANSLOG_RECORD_CRC (1<<2) + +/* page size in transaction log */ +#define TRANSLOG_PAGE_SIZE (8*1024) + +#include "ma_loghandler_lsn.h" + +/* short transaction ID type */ +typedef uint16 SHORT_TRANSACTION_ID; + +/* types of records in the transaction log */ +enum translog_record_type +{ + LOGREC_RESERVED_FOR_CHUNKS23= 0, + LOGREC_REDO_INSERT_ROW_HEAD= 1, + LOGREC_REDO_INSERT_ROW_TAIL= 2, + LOGREC_REDO_INSERT_ROW_BLOB= 3, + LOGREC_REDO_INSERT_ROW_BLOBS= 4, + LOGREC_REDO_PURGE_ROW= 5, + eLOGREC_REDO_PURGE_BLOCKS= 6, + LOGREC_REDO_DELETE_ROW= 7, + LOGREC_REDO_UPDATE_ROW_HEAD= 8, + LOGREC_REDO_INDEX= 9, + LOGREC_REDO_UNDELETE_ROW= 10, + LOGREC_CLR_END= 11, + LOGREC_PURGE_END= 12, + LOGREC_UNDO_ROW_INSERT= 13, + LOGREC_UNDO_ROW_DELETE= 14, + LOGREC_UNDO_ROW_UPDATE= 15, + LOGREC_UNDO_KEY_INSERT= 16, + LOGREC_UNDO_KEY_DELETE= 17, + LOGREC_PREPARE= 18, + LOGREC_PREPARE_WITH_UNDO_PURGE= 19, + LOGREC_COMMIT= 20, + LOGREC_COMMIT_WITH_UNDO_PURGE= 21, + LOGREC_CHECKPOINT_PAGE= 22, + LOGREC_CHECKPOINT_TRAN= 23, + LOGREC_CHECKPOINT_TABL= 24, + LOGREC_REDO_CREATE_TABLE= 25, + LOGREC_REDO_RENAME_TABLE= 26, + LOGREC_REDO_DROP_TABLE= 27, + LOGREC_REDO_TRUNCATE_TABLE= 28, + LOGREC_FILE_ID= 29, + LOGREC_LONG_TRANSACTION_ID= 30, + LOGREC_RESERVED_FUTURE_EXTENSION= 63 +}; +#define LOGREC_NUMBER_OF_TYPES 64 + +typedef uint32 translog_size_t; + +#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024 + +typedef struct st_translog_group_descriptor +{ + TRANSLOG_ADDRESS addr; + uint8 num; +} TRANSLOG_GROUP; + + +typedef struct st_translog_header_buffer +{ + /* LSN of the read record */ + LSN lsn; + /* type of the read record */ + enum translog_record_type type; + /* short transaction ID or 0 if it has no sense for the record */ + SHORT_TRANSACTION_ID short_trid; + /* + The Record length in buffer (including read header, but excluding + hidden part of record (type, short TrID, length) + */ + translog_size_t record_length; + /* + Real compressed LSN(s) size economy (*7 - ) + */ + uint16 compressed_LSN_economy; + /* + Buffer for write decoded header of the record (depend on the record + type) + */ + uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE]; + /* non read body data offset on the page */ + uint16 non_header_data_start_offset; + /* non read body data length in this first chunk */ + uint16 non_header_data_len; + /* number of groups listed in */ + uint groups_no; + /* array of groups descriptors, can be used only if groups_no > 0 */ + TRANSLOG_GROUP *groups; + /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */ + uint chunk0_pages; + /* chunk 0 data address (valid only if groups_no > 0) */ + TRANSLOG_ADDRESS chunk0_data_addr; + /* chunk 0 data size (valid only if groups_no > 0) */ + uint16 chunk0_data_len; +} TRANSLOG_HEADER_BUFFER; + + +struct st_translog_scanner_data +{ + uchar buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */ + TRANSLOG_ADDRESS page_addr; /* current page address */ + TRANSLOG_ADDRESS horizon; /* end of the log which we saw + last time */ + TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ + uchar *page; /* page content pointer */ + translog_size_t page_offset; /* offset of the chunk in the + page */ + my_bool fixed_horizon; /* set horizon only once at + init */ +}; + + +struct st_translog_reader_data +{ + TRANSLOG_HEADER_BUFFER header; /* Header */ + struct st_translog_scanner_data scanner; /* chunks scanner */ + translog_size_t body_offset; /* current chunk body offset */ + translog_size_t current_offset; /* data offset from the record + beginning */ + uint16 read_header; /* number of bytes read in + header */ + uint16 chunk_size; /* current chunk size */ + uint current_group; /* current group */ + uint current_chunk; /* current chunk in the group */ + my_bool eor; /* end of the record */ +}; + + +/* + Initialize transaction log + + SYNOPSIS + translog_init() + directory Directory where log files are put + log_file_max_size max size of one log size (for new logs creation) + server_version version of MySQL servger (MYSQL_VERSION_ID) + server_id server ID (replication & Co) + pagecache Page cache for the log reads + flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION + TRANSLOG_RECORD_CRC) + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_init(const char *directory, uint32 log_file_max_size, + uint32 server_version, + uint32 server_id, PAGECACHE *pagecache, uint flags); + + +/* + Write the log record + + SYNOPSIS + translog_write_record() + lsn LSN of the record will be writen here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + tcb Transaction control block pointer for hooks by + record log type + partN_length length of Ns part of the log + partN_buffer pointer on Ns part buffer + 0 sign of the end of parts + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_write_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + void *tcb, + translog_size_t part1_length, + uchar *part1_buff, ...); + + +/* + Free log handler resources + + SYNOPSIS + translog_destroy() +*/ + +void translog_destroy(); + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + buff log record header buffer + + NOTE + - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed + correctly. + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 - error + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_read_record_header(LSN *lsn, + TRANSLOG_HEADER_BUFFER *buff); + + +/* + Free resources used by TRANSLOG_HEADER_BUFFER + + SYNOPSIS + translog_free_record_header(); +*/ + +void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff); + + +/* + Read a part of the record. + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + offset from the beginning of the record beginning (read + by translog_read_record_header). + length length of record part which have to be read. + buffer buffer where to read the record part (have to be at + least 'length' bytes length) + + RETURN + 0 - error (or read out of the record) + length of data actually read +*/ + +translog_size_t translog_read_record(LSN *lsn, + translog_size_t offset, + translog_size_t length, + uchar *buffer, + struct st_translog_reader_data *data); + + +/* + Flush the log up to given LSN (included) + + SYNOPSIS + translog_flush() + lsn log record serial number up to which (inclusive) + the log have to be flushed + + RETURN + 0 - OK + 1 - Error +*/ + +my_bool translog_flush(LSN *lsn); + + +/* + Read record header and some fixed part of the next record (the part + depend on record type). + + SYNOPSIS + translog_read_next_record_header() + lsn log record serial number (address of the record) + previous to the record which will be read + If LSN present scanner will be initialized from it, + do not use LSN after initialization for fast scanning. + buff log record header buffer + fixed_horizon true if it is OK do not read records which was written + after scaning begining + scanner data for scaning if lsn is NULL scanner data + will be used for continue scaning. + scanner can be NULL. + + NOTE + - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed + correctly (lsn in buffer will be replaced by next record, but initial + lsn will be read correctly). + - it is like translog_read_record_header, but read next record, so see + its NOTES. + - in case of end of the log buff->lsn will be set to + (CONTROL_FILE_IMPOSSIBLE_LOGNO, 0) + RETURN + 0 - error + TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 - End of the log + number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_read_next_record_header(LSN *lsn, + TRANSLOG_HEADER_BUFFER *buff, + my_bool fixed_horizon, + struct + st_translog_scanner_data + *scanner); + +#endif diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h new file mode 100644 index 00000000000..9576d4d734d --- /dev/null +++ b/storage/maria/ma_loghandler_lsn.h @@ -0,0 +1,39 @@ +#ifndef _ma_loghandler_lsn_h +#define _ma_loghandler_lsn_h + +/* Transaction log record address (file_no is int24 on the disk) */ +typedef struct st_translog_address +{ + uint32 file_no; + uint32 rec_offset; +} TRANSLOG_ADDRESS; + +/* + Compare addresses + A1 > A2 -> result > 0 + A1 == A2 -> 0 + A1 < A2 -> result < 0 +*/ +#define cmp_translog_addr(A1,A2) \ + ((A1).file_no == (A2).file_no ? \ + ((int64)(A1).rec_offset) - (int64)(A2).rec_offset : \ + ((int64)(A1).file_no - (int64)(A2).file_no)) + +/* LSN type (address of certain log record chank */ +typedef TRANSLOG_ADDRESS LSN; + +/* Puts LSN into buffer (dst) */ +#define lsn7store(dst, lsn) \ + do { \ + int3store((dst), (lsn)->file_no); \ + int4store((dst) + 3, (lsn)->rec_offset); \ + } while (0) + +/* Unpacks LSN from the buffer (P) */ +#define lsn7korr(lsn, P) \ + do { \ + (lsn)->file_no= uint3korr(P); \ + (lsn)->rec_offset= uint4korr((P) + 3); \ + } while (0) + +#endif diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 506bdbc71ca..380c42c105e 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -26,6 +26,10 @@ #include #endif +#include +#include "ma_loghandler.h" +#include "ma_control_file.h" + /* undef map from my_nosys; We need test-if-disk full */ #undef my_write @@ -438,6 +442,7 @@ extern LIST *maria_open_list; extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; extern uint maria_quick_table_bits; +extern const char *maria_data_root; extern my_bool maria_inited; /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index d0b247d65e1..78b285edd70 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -14,8 +14,10 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include -AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap +AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap +INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap # Only reason to link with libmyisam.a here is that it's where some fulltext # pieces are (but soon we'll remove fulltext dependencies from Maria). @@ -24,6 +26,54 @@ LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/storage/myisam/libmyisam.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ -noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t lockman1-t lockman2-t -CLEANFILES = maria_control + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ \ + $(top_builddir)/storage/maria/ma_loghandler.o +noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t lockman1-t \ + lockman2-t \ + mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ + mf_pagecache_single_64k-t \ + mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \ + mf_pagecache_consist_1kHC-t \ + mf_pagecache_consist_64kHC-t \ + mf_pagecache_consist_1kRD-t \ + mf_pagecache_consist_64kRD-t \ + mf_pagecache_consist_1kWR-t \ + mf_pagecache_consist_64kWR-t \ + ma_test_loghandler-t \ + ma_test_loghandler_multigroup-t \ + ma_test_loghandler_multithread-t \ + ma_test_loghandler_pagecache-t + +mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN + +mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 +mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY + +mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS +mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS + +mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS +mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS + +CLEANFILES = maria_control page_cache_test_file_1 \ + maria_log.???????? maria_control diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index beb86843dd3..7b7e1454cc3 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -33,7 +33,7 @@ #endif #include "maria.h" -#include "../../../storage/maria/ma_control_file.h" +#include "../../../storage/maria/maria_def.h" #include char file_name[FN_REFLEN]; diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c new file mode 100644 index 00000000000..1cbfcac504e --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -0,0 +1,540 @@ +#include "../maria_def.h" +#include +#include + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +#define LONG_BUFFER_SIZE (100 * 1024) + + +#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*1024L +#define ITERATIONS 181000 +*/ + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 +*/ + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*100L +#define ITERATIONS 65000 +*/ + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(uchar *ptr, ulong length) +{ + ulong i; + uchar buff[2]; + for (i= 0; i < length; i++) + { + if (i % 2 == 0) + int2store(buff, i >> 1); + if (ptr[i] != buff[i % 2]) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) buff[i % 2]); + return 1; + } + } + return 0; +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + uchar *buffer, uint skip) +{ + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2); + if (translog_read_record(&rec->lsn, 0, rec->record_length, buffer, NULL) != + rec->record_length) + return 1; + return check_content(buffer + skip, rec->record_length - skip); +} + +int main(int argc, char *argv[]) +{ + uint32 i; + uint32 rec_len; + uint pagen; + uchar long_tr_id[6]; + uchar lsn_buff[23]= + { + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 + }; + uchar long_buffer[LONG_BUFFER_SIZE * 2 + 7 * 2 + 2]; + PAGECACHE pagecache; + LSN lsn, lsn_base, first_lsn, *lsn_ptr; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + int rc; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + + for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i+= 2) + { + int2store(long_buffer + i, (i >> 1)); + /* long_buffer[i]= (i & 0xFF); */ + } + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + srandom(122334817L); + + long_tr_id[5]= 0xff; + + int4store(long_tr_id, 0); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + exit(1); + } + lsn_base= first_lsn= lsn; + + for (i= 1; i < ITERATIONS; i++) + { + if (i % 1000 == 0) + printf("write %d\n", i); + if (i % 2) + { + lsn7store(lsn_buff, &lsn_base); + if (translog_write_record(&lsn, + LOGREC_CLR_END, + (i % 0xFFFF), NULL, 7, lsn_buff, 0)) + { + fprintf(stderr, "1 Can't write reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + lsn7store(lsn_buff, &lsn_base); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) + rec_len= 12; + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_INSERT, + (i % 0xFFFF), + NULL, 7, lsn_buff, rec_len, long_buffer, 0)) + { + fprintf(stderr, "1 Can't write var reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + } + else + { + lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff + 7, &first_lsn); + if (translog_write_record(&lsn, + LOGREC_UNDO_ROW_DELETE, + (i % 0xFFFF), NULL, 23, lsn_buff, 0)) + { + fprintf(stderr, "0 Can't write reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff + 7, &first_lsn); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) + rec_len= 19; + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_DELETE, + (i % 0xFFFF), + NULL, 14, lsn_buff, rec_len, long_buffer, 0)) + { + fprintf(stderr, "0 Can't write var reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + } + int4store(long_tr_id, i); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + (i % 0xFFFF), NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) i); + translog_destroy(); + exit(1); + } + + lsn_base= lsn; + + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) + rec_len= 9; + if (translog_write_record(&lsn, + LOGREC_REDO_INSERT_ROW_HEAD, + (i % 0xFFFF), NULL, rec_len, long_buffer, 0)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + exit(1); + } + if (translog_flush(&lsn)) + { + fprintf(stderr, "Can't flush #%lu\n", (ulong) i); + translog_destroy(); + exit(1); + } + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + srandom(122334817L); + + + rc= 1; + + { + translog_size_t len= translog_read_record_header(&first_lsn, &rec); + if (len == 0) + { + fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || + rec.record_length != 6 || uint4korr(rec.header) != 0 || + (uint)rec.header[4] != 0 || rec.header[5] != 0xFF || + first_lsn.file_no != rec.lsn.file_no || + first_lsn.rec_offset != rec.lsn.rec_offset) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " + "lsn(0x%lx,0x%lx)\n", + (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, + uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5], + (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + lsn= first_lsn; + lsn_ptr= &first_lsn; + for (i= 1;; i++) + { + if (i % 1000 == 0) + printf("read %d\n", i); + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + if (i != ITERATIONS) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS); + goto err; + } + break; + } + lsn_ptr= NULL; /* use scanner after its + initialization */ + if (i % 2) + { + LSN ref; + lsn7korr(&ref, rec.header); + if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || + rec.record_length != 7 || ref.file_no != lsn.file_no || + ref.rec_offset != lsn.rec_offset) + { + fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)" + "type %u, strid %u, len %u, ref(%u,0x%lx), lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint) ref.file_no, (ulong) ref.rec_offset, + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + } + else + { + LSN ref1, ref2; + lsn7korr(&ref1, rec.header); + lsn7korr(&ref2, rec.header + 7); + if (rec.type !=LOGREC_UNDO_ROW_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 23 || + ref1.file_no != lsn.file_no || + ref1.rec_offset != lsn.rec_offset || + ref2.file_no != first_lsn.file_no || + ref2.rec_offset != first_lsn.rec_offset || + rec.header[22] != 0x55 || rec.header[21] != 0xAA || + rec.header[20] != 0x55 || rec.header[19] != 0xAA || + rec.header[18] != 0x55 || rec.header[17] != 0xAA || + rec.header[16] != 0x55 || rec.header[15] != 0xAA || + rec.header[14] != 0x55) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" + "type %u, strid %u, len %u, ref1(%u,0x%lx), " + "ref2(%u,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint) ref1.file_no, (ulong) ref1.rec_offset, + (uint) ref2.file_no, (ulong) ref2.rec_offset, + (uint) rec.header[14], (uint) rec.header[15], + (uint) rec.header[16], (uint) rec.header[17], + (uint) rec.header[18], (uint) rec.header[19], + (uint) rec.header[20], (uint) rec.header[21], + (uint) rec.header[22], + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + } + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header (var) " + "failed (%d)\n", i, errno); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + fprintf(stderr, "EOL met at the middle of iteration (first var) %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (i % 2) + { + LSN ref; + lsn7korr(&ref, rec.header); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) + rec_len= 12; + if (rec.type !=LOGREC_UNDO_KEY_INSERT || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + 7 || + len != 12 || ref.file_no != lsn.file_no || + ref.rec_offset != lsn.rec_offset || + check_content(rec.header + 7, len - 7)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" + "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " + "hdr len: %u (%d), " + "ref(%u,0x%lx), lsn(%u,0x%lx) (%d), content: %d\n", + i, (uint) rec.type, + rec.type !=LOGREC_UNDO_KEY_INSERT, + (uint) rec.short_trid, + rec.short_trid != (i % 0xFFFF), + (ulong) rec.record_length, (ulong) rec_len, + rec.record_length != rec_len + 7, + (uint) len, + len != 12, + (uint) ref.file_no, (ulong) ref.rec_offset, + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, + (len != 12 || ref.file_no != lsn.file_no || + ref.rec_offset != lsn.rec_offset), + check_content(rec.header + 7, len - 7)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 7)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + } + else + { + LSN ref1, ref2; + lsn7korr(&ref1, rec.header); + lsn7korr(&ref2, rec.header + 7); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) + rec_len= 19; + if (rec.type !=LOGREC_UNDO_KEY_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + 14 || + len != 19 || + ref1.file_no != lsn.file_no || + ref1.rec_offset != lsn.rec_offset || + ref2.file_no != first_lsn.file_no || + ref2.rec_offset != first_lsn.rec_offset || + check_content(rec.header + 14, len - 14)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" + "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, " + "ref1(%u,0x%lx), ref2(%u,0x%lx), " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (uint) ref1.file_no, (ulong) ref1.rec_offset, + (uint) ref2.file_no, (ulong) ref2.rec_offset, + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 14)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + } + + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + fprintf(stderr, "EOL met at the middle of iteration %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 6 || uint4korr(rec.header) != i || + rec.header[4] != 0 || rec.header[5] != 0xFF) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + + lsn= rec.lsn; + + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) + rec_len= 9; + if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len || + len != 9 || check_content(rec.header, len)) + { + fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)" + "type %u, strid %u, len %lu != %lu, hdr len: %u, " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + goto err; + } + } + } + + rc= 1; +err: + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + return(test(exit_status() || rc)); +} diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c new file mode 100644 index 00000000000..abb12faa015 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -0,0 +1,570 @@ +#include "../maria_def.h" +#include +#include + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +#define LONG_BUFFER_SIZE ((1024L*1024L*1024L) + (1024L*1024L*512)) + +#define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1) + +#define SHOW_DIVIDER 2 + +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define ITERATIONS 2 +/*#define ITERATIONS 63 */ + +/* +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 +*/ +/* +#define LOG_FILE_SIZE 1024L*1024L*100L +#define ITERATIONS 65000 +*/ + + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(uchar *ptr, ulong length) +{ + ulong i; + uchar buff[4]; + DBUG_ENTER("check_content"); + for (i= 0; i < length; i++) + { + if (i % 4 == 0) + int4store(buff, (i >> 2)); + if (ptr[i] != buff[i % 4]) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) buff[i % 4]); + DBUG_DUMP("mem", ptr +(ulong) (i > 16 ? i - 16 : 0), + (i > 16 ? 16 : i) + (i + 16 < length ? 16 : length - i)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + uchar *buffer, uint skip) +{ + int res= 0; + translog_size_t len; + DBUG_ENTER("read_and_check_content"); + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2); + if ((len= translog_read_record(&rec->lsn, 0, rec->record_length, + buffer, NULL)) != rec->record_length) + { + fprintf(stderr, "Requested %lu byte, read %lu\n", + (ulong) rec->record_length, (ulong) len); + res= 1; + } + res|= check_content(buffer + skip, rec->record_length - skip); + DBUG_RETURN(res); +} + + +static uint32 get_len() +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH; + } while (rec_len >= LONG_BUFFER_SIZE); + return rec_len; +} + +int main(int argc, char *argv[]) +{ + uint32 i; + uint32 rec_len; + uint pagen; + uchar long_tr_id[6]; + uchar lsn_buff[23]= + { + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 + }; + uchar *long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); + PAGECACHE pagecache; + LSN lsn, lsn_base, first_lsn, *lsn_ptr; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + int rc; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + + { + uchar buff[4]; + for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) + { + if (i % 4 == 0) + int4store(buff, (i >> 2)); + long_buffer[i]= buff[i % 4]; + } + } + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + srandom(122334817L); + + long_tr_id[5]= 0xff; + + int4store(long_tr_id, 0); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + exit(1); + } + lsn_base= first_lsn= lsn; + + for (i= 1; i < ITERATIONS; i++) + { + if (i % SHOW_DIVIDER == 0) + printf("write %d\n", i); + if (i % 2) + { + lsn7store(lsn_buff, &lsn_base); + if (translog_write_record(&lsn, + LOGREC_CLR_END, + (i % 0xFFFF), NULL, 7, lsn_buff, 0)) + { + fprintf(stderr, "1 Can't write reference before record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + lsn7store(lsn_buff, &lsn_base); + rec_len= get_len(); + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_INSERT, + (i % 0xFFFF), + NULL, 7, lsn_buff, rec_len, long_buffer, 0)) + { + fprintf(stderr, "1 Can't write var reference before record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + } + else + { + lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff + 7, &first_lsn); + if (translog_write_record(&lsn, + LOGREC_UNDO_ROW_DELETE, + (i % 0xFFFF), NULL, 23, lsn_buff, 0)) + { + fprintf(stderr, "0 Can't write reference before record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff + 7, &first_lsn); + rec_len= get_len(); + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_DELETE, + (i % 0xFFFF), + NULL, 14, lsn_buff, rec_len, long_buffer, 0)) + { + fprintf(stderr, "0 Can't write var reference before record #%lu\n", + (ulong) i); + translog_destroy(); + exit(1); + } + } + int4store(long_tr_id, i); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + (i % 0xFFFF), NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) i); + translog_destroy(); + exit(1); + } + + lsn_base= lsn; + + rec_len= get_len(); + if (translog_write_record(&lsn, + LOGREC_REDO_INSERT_ROW_HEAD, + (i % 0xFFFF), NULL, rec_len, long_buffer, 0)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + exit(1); + } + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0)) + { + fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + srandom(122334817L); + + rc= 1; + + { + translog_size_t len= translog_read_record_header(&first_lsn, &rec); + if (len == 0) + { + fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || + rec.record_length != 6 || uint4korr(rec.header) != 0 || + (uint)rec.header[4] != 0 || rec.header[5] != 0xFF || + first_lsn.file_no != rec.lsn.file_no || + first_lsn.rec_offset != rec.lsn.rec_offset) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " + "lsn(0x%lx,0x%lx)\n", + (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, + uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5], + (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + translog_free_record_header(&rec); + lsn= first_lsn; + lsn_ptr= &first_lsn; + for (i= 1;; i++) + { + if (i % SHOW_DIVIDER == 0) + printf("read %d\n", i); + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + if (i != ITERATIONS) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS); + translog_free_record_header(&rec); + goto err; + } + break; + } + lsn_ptr= NULL; /* use scanner after its + initialization */ + + if (i % 2) + { + LSN ref; + lsn7korr(&ref, rec.header); + if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || + rec.record_length != 7 || ref.file_no != lsn.file_no || + ref.rec_offset != lsn.rec_offset) + { + fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)" + "type %u, strid %u, len %u, ref(%u,0x%lx), lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint) ref.file_no, (ulong) ref.rec_offset, + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + else + { + LSN ref1, ref2; + lsn7korr(&ref1, rec.header); + lsn7korr(&ref2, rec.header + 7); + if (rec.type !=LOGREC_UNDO_ROW_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 23 || + ref1.file_no != lsn.file_no || + ref1.rec_offset != lsn.rec_offset || + ref2.file_no != first_lsn.file_no || + ref2.rec_offset != first_lsn.rec_offset || + rec.header[22] != 0x55 || rec.header[21] != 0xAA || + rec.header[20] != 0x55 || rec.header[19] != 0xAA || + rec.header[18] != 0x55 || rec.header[17] != 0xAA || + rec.header[16] != 0x55 || rec.header[15] != 0xAA || + rec.header[14] != 0x55) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" + "type %u, strid %u, len %u, ref1(%u,0x%lx), " + "ref2(%u,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint) ref1.file_no, (ulong) ref1.rec_offset, + (uint) ref2.file_no, (ulong) ref2.rec_offset, + (uint) rec.header[14], (uint) rec.header[15], + (uint) rec.header[16], (uint) rec.header[17], + (uint) rec.header[18], (uint) rec.header[19], + (uint) rec.header[20], (uint) rec.header[21], + (uint) rec.header[22], + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + translog_free_record_header(&rec); + + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header (var) " + "failed (%d)\n", i, errno); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + fprintf(stderr, "EOL met at the middle of iteration (first var) %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (i % 2) + { + LSN ref; + lsn7korr(&ref, rec.header); + rec_len= get_len(); + if (rec.type !=LOGREC_UNDO_KEY_INSERT || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + 7 || + len != 12 || ref.file_no != lsn.file_no || + ref.rec_offset != lsn.rec_offset || + check_content(rec.header + 7, len - 7)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" + "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " + "hdr len: %u (%d), " + "ref(%u,0x%lx), lsn(%u,0x%lx) (%d), content: %d\n", + i, (uint) rec.type, + rec.type !=LOGREC_UNDO_KEY_INSERT, + (uint) rec.short_trid, + rec.short_trid != (i % 0xFFFF), + (ulong) rec.record_length, (ulong) rec_len, + rec.record_length != rec_len + 7, + (uint) len, + len != 12, + (uint) ref.file_no, (ulong) ref.rec_offset, + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, + (ref.file_no != lsn.file_no || + ref.rec_offset != lsn.rec_offset), + check_content(rec.header + 7, len - 7)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 7)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + else + { + LSN ref1, ref2; + lsn7korr(&ref1, rec.header); + lsn7korr(&ref2, rec.header + 7); + rec_len= get_len(); + if (rec.type !=LOGREC_UNDO_KEY_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + 14 || + len != 19 || + ref1.file_no != lsn.file_no || + ref1.rec_offset != lsn.rec_offset || + ref2.file_no != first_lsn.file_no || + ref2.rec_offset != first_lsn.rec_offset || + check_content(rec.header + 14, len - 14)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" + "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, " + "ref1(%u,0x%lx), ref2(%u,0x%lx), " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (uint) ref1.file_no, (ulong) ref1.rec_offset, + (uint) ref2.file_no, (ulong) ref2.rec_offset, + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 14)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + translog_free_record_header(&rec); + + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + fprintf(stderr, "EOL met at the middle of iteration %u " + "instead of beginning of %u\n", i, ITERATIONS); + translog_free_record_header(&rec); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 6 || uint4korr(rec.header) != i || + rec.header[4] != 0 || rec.header[5] != 0xFF) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + translog_free_record_header(&rec); + + lsn= rec.lsn; + + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + rec_len= get_len(); + if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len || + len != 9 || check_content(rec.header, len)) + { + fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)" + "type %u, strid %u, len %lu != %lu, hdr len: %u, " + "lsn(%u,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + } + + rc= 0; +err: + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + return (test(exit_status() || rc)); +} diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c new file mode 100644 index 00000000000..794dc6dd255 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -0,0 +1,468 @@ +#include "../maria_def.h" +#include +#include + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +/*#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC */ +#define LOG_FLAGS 0 +/*#define LONG_BUFFER_SIZE (1024L*1024L*1024L + 1024L*1024L*512)*/ +#define LONG_BUFFER_SIZE (1024L*1024L*1024L) +#define MIN_REC_LENGTH 30 +#define SHOW_DIVIDER 10 +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define ITERATIONS 3 +#define WRITERS 3 +static uint number_of_writers= WRITERS; + +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; + +static ulong lens[WRITERS][ITERATIONS]; +static LSN lsns1[WRITERS][ITERATIONS]; +static LSN lsns2[WRITERS][ITERATIONS]; +static uchar *long_buffer; + +/* + Get pseudo-random length of the field in + limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE] + + SYNOPSYS + get_len() + + RETURN + length - length >= 0 length <= LONG_BUFFER_SIZE +*/ + +static uint32 get_len() +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH; + } while (rec_len >= LONG_BUFFER_SIZE); + return rec_len; +} + + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(uchar *ptr, ulong length) +{ + ulong i; + for (i= 0; i < length; i++) + { + if (ptr[i] != (i & 0xFF)) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) (i & 0xFF)); + return 1; + } + } + return 0; +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + uchar *buffer, uint skip) +{ + int res= 0; + translog_size_t len; + DBUG_ENTER("read_and_check_content"); + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2); + if ((len= translog_read_record(&rec->lsn, 0, rec->record_length, + buffer, NULL)) != rec->record_length) + { + fprintf(stderr, "Requested %lu byte, read %lu\n", + (ulong) rec->record_length, (ulong) len); + res= 1; + } + res|= check_content(buffer + skip, rec->record_length - skip); + DBUG_RETURN(res); +} + +void writer(int num) +{ + LSN lsn; + uchar long_tr_id[6]; + uint i; + DBUG_ENTER("writer"); + + for (i= 0; i < ITERATIONS; i++) + { + uint len= get_len(); + lens[num][i]= len; + + int2store(long_tr_id, num); + int4store(long_tr_id + 2, i); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + num, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write LOGREC_LONG_TRANSACTION_ID record #%lu " + "thread %i\n", (ulong) i, num); + translog_destroy(); + return; + } + lsns1[num][i]= lsn; + if (translog_write_record(&lsn, + LOGREC_REDO_INSERT_ROW_HEAD, + num, NULL, len, long_buffer, 0)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + return; + } + lsns2[num][i]= lsn; + DBUG_PRINT("info", ("thread: %u, iteration: %u, len: %lu, " + "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)", + num, i, (ulong) lens[num][i], + (ulong) lsns1[num][i].file_no, + (ulong) lsns1[num][i].rec_offset, + (ulong) lsns2[num][i].file_no, + (ulong) lsns2[num][i].rec_offset)); + printf("thread: %u, iteration: %u, len: %lu, " + "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)\n", + num, i, (ulong) lens[num][i], + (ulong) lsns1[num][i].file_no, + (ulong) lsns1[num][i].rec_offset, + (ulong) lsns2[num][i].file_no, (ulong) lsns2[num][i].rec_offset); + } + DBUG_VOID_RETURN; +} + + +static void *test_thread_writer(void *arg) +{ + int param= *((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_writer"); + DBUG_PRINT("enter", ("param: %d", param)); + + writer(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are + ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + + +int main(int argc, char **argv __attribute__ ((unused))) +{ + uint32 i; + uint pagen; + PAGECACHE pagecache; + LSN first_lsn, *lsn_ptr; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error; + int rc; + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); + if (long_buffer == 0) + { + fprintf(stderr, "End of memory\n"); + exit(1); + } + for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) + long_buffer[i]= (i & 0xFF); + + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr, "Got error: %d from pthread_attr_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error, errno); + exit(1); + } +#ifndef pthread_attr_setstacksize /* void return value */ + if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) + { + fprintf(stderr, "Got error: %d from pthread_attr_setstacksize " + "(errno: %d)\n", error, errno); + exit(1); + } +#endif +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + srandom(122334817L); + { + uchar long_tr_id[6]= + { + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 + }; + + if (translog_write_record(&first_lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write the first record\n"); + translog_destroy(); + exit(1); + } + } + + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock " + "(errno: %d)\n", error, errno); + exit(1); + } + + while (number_of_writers != 0) + { + param= (int*) malloc(sizeof(int)); + *param= number_of_writers - 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr, "Got error: %d from pthread_create (errno: %d)\n", + error, errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + DBUG_PRINT("info", ("All threads are started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock\n", error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count))) + fprintf(stderr, "COND_thread_count: %d from pthread_cond_wait\n", error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error); + DBUG_PRINT("info", ("All threads ended")); + + /* Find last LSN and flush up to it (all our log) */ + { + LSN max= + { + 0, 0 + }; + for (i= 0; i < WRITERS; i++) + { + if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0) + max= lsns2[i][ITERATIONS - 1]; + } + DBUG_PRINT("info", ("first lsn: (%lu,0x%lx), max lsn: (%lu,0x%lx)", + (ulong) first_lsn.file_no, + (ulong) first_lsn.rec_offset, + (ulong) max.file_no, (ulong) max.rec_offset)); + translog_flush(&max); + } + + rc= 1; + + { + uint indeces[WRITERS]; + uint index, len, stage; + bzero(indeces, sizeof(uint) * WRITERS); + + bzero(indeces, sizeof(indeces)); + + lsn_ptr= &first_lsn; + for (i= 0;; i++) + { + len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + lsn_ptr= NULL; + + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + if (i != WRITERS * ITERATIONS * 2) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS * WRITERS * 2); + translog_free_record_header(&rec); + goto err; + } + break; + } + index= indeces[rec.short_trid] / 2; + stage= indeces[rec.short_trid] % 2; + printf("read(%d) thread: %d, iteration %d, stage %d\n", + i, (uint) rec.short_trid, index, stage); + if (stage == 0) + { + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || + rec.record_length != 6 || + uint2korr(rec.header) != rec.short_trid || + index != uint4korr(rec.header + 2) || + cmp_translog_addr(lsns1[rec.short_trid][index], rec.lsn) != 0) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" + "type %u, strid %u %u, len %u, i: %u %u, " + "lsn(%lu,0x%lx) (%lu,0x%lx)\n", + i, (uint) rec.type, + (uint) rec.short_trid, (uint) uint2korr(rec.header), + (uint) rec.record_length, + (uint) index, (uint) uint4korr(rec.header + 2), + (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, + (ulong) lsns1[rec.short_trid][index].file_no, + (ulong) lsns1[rec.short_trid][index].rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + else + { + if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || + len != 9 || + rec.record_length != lens[rec.short_trid][index] || + cmp_translog_addr(lsns2[rec.short_trid][index], rec.lsn) != 0 || + check_content(rec.header, len)) + { + fprintf(stderr, + "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d) " + " thread: %d, iteration %d, stage %d\n" + "type %u (%d), len %u, length %lu %lu (%d) " + "lsn(%lu,0x%lx) (%lu,0x%lx)\n", + i, (uint) rec.short_trid, index, stage, + (uint) rec.type, (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD), + (uint) len, + (ulong) rec.record_length, lens[rec.short_trid][index], + (rec.record_length != lens[rec.short_trid][index]), + (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, + (ulong) lsns2[rec.short_trid][index].file_no, + (ulong) lsns2[rec.short_trid][index].rec_offset); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_REDO_INSERT_ROW_HEAD in whole rec read " + "lsn(%u,0x%lx)\n", + (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + translog_free_record_header(&rec); + goto err; + } + } + translog_free_record_header(&rec); + indeces[rec.short_trid]++; + } + } + + rc= 0; +err: + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + DBUG_RETURN(test(exit_status() || rc)); +} diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c new file mode 100644 index 00000000000..6771b5f888d --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -0,0 +1,140 @@ +#include "../maria_def.h" +#include +#include + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) +#define PCACHE_PAGE TRANSLOG_PAGE_SIZE +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define LOG_FLAGS 0 + +static char *first_translog_file= (char*)"maria_log.00000001"; +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; + +int main(int argc, char *argv[]) +{ + uint pagen; + uchar long_tr_id[6]; + PAGECACHE pagecache; + LSN lsn; + MY_STAT st, *stat; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + /* be sure that we have no logs in the directory*/ + if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0))) + my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); + if (my_stat(first_translog_file, &st, MYF(0))) + my_delete(first_translog_file, MYF(0)); + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler_pagecache.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler_pagecache.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PCACHE_PAGE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0) + { + fprintf(stderr, "There is no %s (%d)\n", first_translog_file, errno); + exit(1); + } + if (st.st_size != TRANSLOG_PAGE_SIZE) + { + fprintf(stderr, + "incorrect initial size of %s: %ld instead of %ld\n", + first_translog_file, (long)st.st_size, (long)TRANSLOG_PAGE_SIZE); + exit(1); + } + int4store(long_tr_id, 0); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + exit(1); + } + + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + + { + uchar page[PCACHE_PAGE]; + + bzero(page, PCACHE_PAGE); +#define PAGE_LSN_OFFSET 0 + lsn7store(page + PAGE_LSN_OFFSET, &lsn); + pagecache_write(&pagecache, &file1, 0, 3, (char*)page, + PAGECACHE_LSN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + } + if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0) + { + fprintf(stderr, "can't stat %s (%d)\n", first_translog_file, errno); + exit(1); + } + if (st.st_size != TRANSLOG_PAGE_SIZE * 2) + { + fprintf(stderr, + "incorrect initial size of %s: %ld instead of %ld\n", + first_translog_file, + (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2)); + exit(1); + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); + my_delete(first_translog_file, MYF(0)); + my_delete(file1_name, MYF(0)); + + exit(0); +} diff --git a/unittest/mysys/mf_pagecache_consist.c b/storage/maria/unittest/mf_pagecache_consist.c similarity index 95% rename from unittest/mysys/mf_pagecache_consist.c rename to storage/maria/unittest/mf_pagecache_consist.c index f7724222a09..9389e5a093c 100755 --- a/unittest/mysys/mf_pagecache_consist.c +++ b/storage/maria/unittest/mf_pagecache_consist.c @@ -57,6 +57,29 @@ static uint flush_divider= 1000; #endif /*TEST_HIGH_CONCURENCY*/ +/* + Get pseudo-random length of the field in (0;limit) + + SYNOPSYS + get_len() + limit limit for generated value + + RETURN + length where length >= 0 & length < limit +*/ + +static uint get_len(uint limit) +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / limit); + } while (rec_len >= limit || rec_len == 0); + return rec_len; +} + + /* check page consistency */ uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, int tag) @@ -70,7 +93,7 @@ uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, { uint len= *((uint *)(buff + end)); uint j; - end+= sizeof(uint)+ sizeof(uint); + end+= sizeof(uint) + sizeof(uint); if (len + end > PAGE_SIZE) { diag("incorrect field header #%u by offset %lu\n", i, offset + end + j); @@ -178,7 +201,7 @@ void reader(int num) for (i= 0; i < number_of_tests; i++) { - uint page= rand()/(RAND_MAX/number_of_pages); + uint page= get_len(number_of_pages); pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, @@ -201,13 +224,13 @@ void writer(int num) for (i= 0; i < number_of_tests; i++) { uint end; - uint page= rand()/(RAND_MAX/number_of_pages); + uint page= get_len(number_of_pages); pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE, 0); end= check_page(buffr, page * PAGE_SIZE, 1, page, num); - put_rec(buffr, end, rand()/(RAND_MAX/record_length_limit), num); + put_rec(buffr, end, get_len(record_length_limit), num); pagecache_write(&pagecache, &file1, page, 3, (char*)buffr, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE_UNLOCK, @@ -348,7 +371,7 @@ int main(int argc, char **argv __attribute__((unused))) if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, - PAGE_SIZE, 0)) == 0) + PAGE_SIZE)) == 0) { fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", errno); diff --git a/unittest/mysys/mf_pagecache_single.c b/storage/maria/unittest/mf_pagecache_single.c similarity index 99% rename from unittest/mysys/mf_pagecache_single.c rename to storage/maria/unittest/mf_pagecache_single.c index 49ecd2986ab..3c4a3642fe9 100644 --- a/unittest/mysys/mf_pagecache_single.c +++ b/storage/maria/unittest/mf_pagecache_single.c @@ -346,7 +346,7 @@ int simple_big_test() unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE); unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE); struct file_desc *desc= - (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2)) * + (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2) + 1) * sizeof(struct file_desc)); int res, i; DBUG_ENTER("simple_big_test"); @@ -363,6 +363,8 @@ int simple_big_test() PAGECACHE_WRITE_DELAY, 0); } + desc[i].length= 0; + desc[i].content= NULL; ok(1, "Simple big file write"); /* check written pages sequentally read */ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) @@ -528,7 +530,7 @@ int main(int argc, char **argv __attribute__((unused))) plan(12); if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, - PAGE_SIZE, 0)) == 0) + PAGE_SIZE)) == 0) { fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", errno); diff --git a/unittest/mysys/test_file.c b/storage/maria/unittest/test_file.c similarity index 100% rename from unittest/mysys/test_file.c rename to storage/maria/unittest/test_file.c diff --git a/unittest/mysys/test_file.h b/storage/maria/unittest/test_file.h similarity index 100% rename from unittest/mysys/test_file.h rename to storage/maria/unittest/test_file.h diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index 9b230272329..a32dbb9b0c3 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -7,45 +7,4 @@ LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a -noinst_PROGRAMS = bitmap-t base64-t my_atomic-t \ - mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ - mf_pagecache_single_64k-t \ - mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \ - mf_pagecache_consist_1kHC-t mf_pagecache_consist_64kHC-t \ - mf_pagecache_consist_1kRD-t mf_pagecache_consist_64kRD-t \ - mf_pagecache_consist_1kWR-t mf_pagecache_consist_64kWR-t - -# tests for mysys/mf_pagecache.c - -mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c -mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c -mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN - -mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 -mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 - -mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 - -mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY -mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY - -mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS -mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS - -mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS -mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS - -CLEANFILES = my_pagecache_debug.log page_cache_test_file_1 +noinst_PROGRAMS = bitmap-t base64-t my_atomic-t From 91a8199773a8ee6b4d5a00b337d9b49a69dfc1ea Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 12 Feb 2007 14:23:43 +0200 Subject: [PATCH 88/95] Postmerge fix (including changing type of LSN) Some debug info and comments added include/pagecache.h: postmerge fix mysys/mf_pagecache.c: Postmerge fix (including changing type of LSN) Additional DBUG_ASSERTs added Comment about pinning mechanism added storage/maria/ma_control_file.c: Used the same LSN storing procedure everywhere Postmerge fix (including changing type of LSN) storage/maria/ma_control_file.h: Postmerge fix (including changing type of LSN) storage/maria/ma_loghandler.c: Postmerge fix (including changing type of LSN) storage/maria/ma_loghandler.h: Postmerge fix (including changing type of LSN) storage/maria/ma_loghandler_lsn.h: Postmerge fix (including changing type of LSN) storage/maria/unittest/Makefile.am: Postmerge fix storage/maria/unittest/ma_control_file-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler_multithread-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/mf_pagecache_consist.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/mf_pagecache_single.c: Postmerge fix (including changing type of LSN) --- include/pagecache.h | 14 +- mysys/mf_pagecache.c | 158 ++-- storage/maria/ma_control_file.c | 34 +- storage/maria/ma_control_file.h | 10 +- storage/maria/ma_loghandler.c | 827 +++++++++--------- storage/maria/ma_loghandler.h | 8 +- storage/maria/ma_loghandler_lsn.h | 41 +- storage/maria/unittest/Makefile.am | 56 +- storage/maria/unittest/ma_control_file-t.c | 57 +- storage/maria/unittest/ma_test_loghandler-t.c | 136 ++- .../ma_test_loghandler_multigroup-t.c | 133 ++- .../ma_test_loghandler_multithread-t.c | 59 +- .../unittest/ma_test_loghandler_pagecache-t.c | 3 +- storage/maria/unittest/mf_pagecache_consist.c | 1 + storage/maria/unittest/mf_pagecache_single.c | 5 +- 15 files changed, 789 insertions(+), 753 deletions(-) diff --git a/include/pagecache.h b/include/pagecache.h index b507c1fb4ae..32e23fb54ea 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -21,6 +21,7 @@ C_MODE_START #include "../storage/maria/ma_loghandler_lsn.h" +#include /* Type of the page */ enum pagecache_page_type @@ -73,8 +74,6 @@ enum pagecache_write_mode typedef void *PAGECACHE_PAGE_LINK; -typedef uint64 LSN; - /* file descriptor for Maria */ typedef struct st_pagecache_file { @@ -152,8 +151,8 @@ typedef struct st_pagecache ulong param_division_limit; /* min. percentage of warm blocks */ ulong param_age_threshold; /* determines when hot block is downgraded */ - /* Statistics variables. These are reset in reset_key_cache_counters(). */ - ulong global_blocks_changed; /* number of currently dirty blocks */ + /* Statistics variables. These are reset in reset_pagecache_counters(). */ + ulong global_blocks_changed; /* number of currently dirty blocks */ ulonglong global_cache_w_requests;/* number of write requests (write hits) */ ulonglong global_cache_write; /* number of writes from cache to files */ ulonglong global_cache_r_requests;/* number of read requests (read hits) */ @@ -200,14 +199,12 @@ extern void pagecache_unlock_page(PAGECACHE *pagecache, pgcache_page_no_t pageno, enum pagecache_page_lock lock, enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN_PTR first_REDO_LSN_for_page); + LSN first_REDO_LSN_for_page); extern void pagecache_unlock(PAGECACHE *pagecache, PAGECACHE_PAGE_LINK *link, enum pagecache_page_lock lock, enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN_PTR first_REDO_LSN_for_page); + LSN first_REDO_LSN_for_page); extern void pagecache_unpin_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, pgcache_page_no_t pageno); @@ -225,6 +222,7 @@ extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, LEX_STRING *str, LSN *max_lsn); +extern int reset_pagecache_counters(const char *name, PAGECACHE *pagecache); C_MODE_END #endif /* _keycache_h */ diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 3773119109a..9ef3d5841b5 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -538,7 +538,7 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); #define pagecache_pthread_cond_signal pthread_cond_signal #endif /* defined(PAGECACHE_DEBUG) */ -extern my_bool translog_flush(LSN *lsn); +extern my_bool translog_flush(LSN lsn); /* Write page to the disk @@ -570,13 +570,13 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, DBUG_PRINT("info", ("Log handler call")); /* TODO: integrate with page format */ #define PAGE_LSN_OFFSET 0 - lsn7korr(&lsn, buffer + PAGE_LSN_OFFSET); + lsn= lsn7korr(buffer + PAGE_LSN_OFFSET); /* check CONTROL_FILE_IMPOSSIBLE_FILENO & CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET */ - DBUG_ASSERT(lsn.file_no != 0 && lsn.rec_offset != 0); - translog_flush(&lsn); + DBUG_ASSERT(lsn != 0); + translog_flush(lsn); } DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, (pageno)<<(pagecache->shift), flags)); @@ -1354,6 +1354,7 @@ static void unreg_request(PAGECACHE *pagecache, (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, block->requests)); BLOCK_INFO(block); + DBUG_ASSERT(block->requests > 0); if (! --block->requests) { my_bool hot; @@ -1400,6 +1401,7 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) { DBUG_ENTER("remove_reader"); BLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests > 0); #ifdef THREAD if (! --block->hash_link->requests && block->condvar) pagecache_pthread_cond_signal(block->condvar); @@ -1722,12 +1724,14 @@ restart: if (page_status != PAGE_READ) { /* We don't need the page in the cache: we are going to write on disk */ + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; unlink_hash(pagecache, hash_link); return 0; } if (!(block->status & BLOCK_IN_FLUSH)) { + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; /* Remove block to invalidate the page in the block buffer @@ -1744,6 +1748,7 @@ restart: return 0; } /* Wait until the page is flushed on disk */ + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; { #ifdef THREAD @@ -1795,6 +1800,7 @@ restart: } else { + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; KEYCACHE_DBUG_PRINT("find_block", ("request waiting for old page to be saved")); @@ -1851,8 +1857,8 @@ restart: pagecache->blocks_used++; } pagecache->blocks_unused--; - DBUG_ASSERT((block->status & BLOCK_WRLOCK)); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -1911,18 +1917,19 @@ restart: block->hits_left= init_hits_left; block->last_hit_time= 0; if (reg_req) - reg_requests(pagecache, block,1); + reg_requests(pagecache, block, 1); hash_link->block= block; } + BLOCK_INFO(block); DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); if (block->hash_link != hash_link && ! (block->status & BLOCK_IN_SWITCH) ) { /* this is a primary request for a new page */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); KEYCACHE_DBUG_PRINT("find_block", @@ -2289,9 +2296,9 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache, retry: DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); BLOCK_INFO(block); - DBUG_ASSERT(block->hash_link->requests != 0); + DBUG_ASSERT(block->hash_link->requests > 0); block->hash_link->requests--; - DBUG_ASSERT(block->requests != 0); + DBUG_ASSERT(block->requests > 0); unreg_request(pagecache, block, 1); BLOCK_INFO(block); DBUG_RETURN(1); @@ -2412,8 +2419,19 @@ static void read_block(PAGECACHE *pagecache, pageno number of the block of data in the file lock lock change pin pin page - stamp_this_page put LSN stamp on the page - first_REDO_LSN_for_page + first_REDO_LSN_for_page do not set it if it is zero + + NOTE + Pininig uses requests registration mechanism it works following way: + | beginnig | ending | + | of func. | of func. | + ----------------------------+-------------+---------------+ + PAGECACHE_PIN_LEFT_PINNED | - | - | + PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request | + PAGECACHE_PIN | reg request | - | + PAGECACHE_UNPIN | - | unreg request | + + */ void pagecache_unlock_page(PAGECACHE *pagecache, @@ -2421,8 +2439,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, pgcache_page_no_t pageno, enum pagecache_page_lock lock, enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN_PTR first_REDO_LSN_for_page) + LSN first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block; int page_st; @@ -2444,15 +2461,15 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); + /* See NOTE for pagecache_unlock_page about registering requests */ + block= find_block(pagecache, file, pageno, 0, 0, + test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st); BLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); - if (stamp_this_page) + if (first_REDO_LSN_for_page) { DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); - /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -2472,7 +2489,8 @@ void pagecache_unlock_page(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ if (pin != PAGECACHE_PIN_LEFT_PINNED) unreg_request(pagecache, block, 1); @@ -2504,7 +2522,6 @@ void pagecache_unpin_page(PAGECACHE *pagecache, DBUG_ENTER("pagecache_unpin_page"); DBUG_PRINT("enter", ("fd: %u page: %lu", (uint) file->file, (ulong) pageno)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* As soon as we keep lock cache can be used, and we have lock bacause want @@ -2513,6 +2530,7 @@ void pagecache_unpin_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); + /* See NOTE for pagecache_unlock_page about registering requests */ block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); @@ -2539,7 +2557,8 @@ void pagecache_unpin_page(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests */ unreg_request(pagecache, block, 1); @@ -2561,16 +2580,14 @@ void pagecache_unpin_page(PAGECACHE *pagecache, link direct link to page (returned by read or write) lock lock change pin pin page - stamp_this_page put LSN stamp on the page - first_REDO_LSN_for_page + first_REDO_LSN_for_page do not set it if it is zero */ void pagecache_unlock(PAGECACHE *pagecache, PAGECACHE_PAGE_LINK *link, enum pagecache_page_lock lock, enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN_PTR first_REDO_LSN_for_page) + LSN first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; DBUG_ENTER("pagecache_unlock"); @@ -2580,8 +2597,12 @@ void pagecache_unlock(PAGECACHE *pagecache, (ulong) block->hash_link->pageno, page_cache_page_lock_str[lock], page_cache_page_pin_str[pin])); - /* we do not allow any lock/pin increasing here */ + /* + We do not allow any lock/pin increasing here and page can't be + unpinned because we use direct link. + */ DBUG_ASSERT(pin != PAGECACHE_PIN && + pin != PAGECACHE_PIN_LEFT_UNPINNED && lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE); if (pin == PAGECACHE_PIN_LEFT_UNPINNED && @@ -2611,12 +2632,10 @@ void pagecache_unlock(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - if (stamp_this_page) + if (first_REDO_LSN_for_page) { DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); - /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -2636,7 +2655,8 @@ void pagecache_unlock(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ if (pin != PAGECACHE_PIN_LEFT_PINNED) unreg_request(pagecache, block, 1); @@ -2701,7 +2721,8 @@ void pagecache_unpin(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ unreg_request(pagecache, block, 1); @@ -2790,10 +2811,11 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_r_requests++; + /* See NOTE for pagecache_unlock_page about registering requests. */ block= find_block(pagecache, file, pageno, level, test(lock == PAGECACHE_LOCK_WRITE), - test((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || block->type == type); @@ -2836,9 +2858,10 @@ restart: remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ - if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) unreg_request(pagecache, block, 1); else *link= (PAGECACHE_PAGE_LINK)block; @@ -2898,6 +2921,8 @@ my_bool pagecache_delete_page(PAGECACHE *pagecache, page_cache_page_pin_str[pin])); DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); + DBUG_ASSERT(pin == PAGECACHE_PIN || + pin == PAGECACHE_PIN_LEFT_PINNED); restart: @@ -2920,6 +2945,9 @@ restart: DBUG_RETURN(0); } block= link->block; + /* See NOTE for pagecache_unlock_page about registering requests. */ + if (pin == PAGECACHE_PIN) + reg_requests(pagecache, block, 1); DBUG_ASSERT(block != 0); if (make_lock_and_pin(pagecache, block, lock, pin)) { @@ -2972,9 +3000,9 @@ restart: make_lock_and_pin(pagecache, block, PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN); + DBUG_ASSERT(link->requests > 0); link->requests--; - if (pin == PAGECACHE_PIN_LEFT_PINNED) - unreg_request(pagecache, block, 1); + /* See NOTE for pagecache_unlock_page about registering requests. */ free_block(pagecache, block); err: @@ -3119,16 +3147,15 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_w_requests++; - { - int need_wrlock= (write_mode != PAGECACHE_WRITE_DONE && - lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && - lock != PAGECACHE_LOCK_WRITE_UNLOCK && - lock != PAGECACHE_LOCK_WRITE_TO_READ); - block= find_block(pagecache, file, pageno, level, - (need_wrlock ? 1 : 0), - (need_wrlock ? 1 : 0), - &page_st); - } + /* See NOTE for pagecache_unlock_page about registering requests. */ + block= find_block(pagecache, file, pageno, level, + test(write_mode != PAGECACHE_WRITE_DONE && + lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && + lock != PAGECACHE_LOCK_WRITE_UNLOCK && + lock != PAGECACHE_LOCK_WRITE_TO_READ), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), + &page_st); if (!block) { DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); @@ -3212,12 +3239,11 @@ restart: } /* Unregister the request */ - + DBUG_ASSERT(block->hash_link->requests > 0); block->hash_link->requests--; - if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) - { + /* See NOTE for pagecache_unlock_page about registering requests. */ + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) unreg_request(pagecache, block, 1); - } else *link= (PAGECACHE_PAGE_LINK)block; @@ -3280,7 +3306,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) unlink_changed(block); DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -3361,7 +3387,7 @@ static int flush_cached_blocks(PAGECACHE *pagecache, } /* if the block is not pinned then it is not write locked */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); #ifndef DBUG_OFF { int rc= @@ -3686,33 +3712,33 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, Reset the counters of a key cache. SYNOPSIS - reset_key_cache_counters() + reset_pagecache_counters() name the name of a key cache - key_cache pointer to the key kache to be reset + pagecache pointer to the pagecache to be reset DESCRIPTION - This procedure is used by process_key_caches() to reset the counters of all - currently used key caches, both the default one and the named ones. + This procedure is used to reset the counters of all currently used key + caches, both the default one and the named ones. RETURN 0 on success (always because it can't fail) */ -static int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) +int reset_pagecache_counters(const char *name, PAGECACHE *pagecache) { - DBUG_ENTER("reset_key_cache_counters"); - if (!key_cache->inited) + DBUG_ENTER("reset_pagecache_counters"); + if (!pagecache->inited) { DBUG_PRINT("info", ("Key cache %s not initialized.", name)); DBUG_RETURN(0); } DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); - key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ - key_cache->global_cache_r_requests= 0; /* Key_read_requests */ - key_cache->global_cache_read= 0; /* Key_reads */ - key_cache->global_cache_w_requests= 0; /* Key_write_requests */ - key_cache->global_cache_write= 0; /* Key_writes */ + pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + pagecache->global_cache_r_requests= 0; /* Key_read_requests */ + pagecache->global_cache_read= 0; /* Key_reads */ + pagecache->global_cache_w_requests= 0; /* Key_write_requests */ + pagecache->global_cache_write= 0; /* Key_writes */ DBUG_RETURN(0); } diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 47583466cd7..3f9af34b2f1 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -37,7 +37,7 @@ #define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) #define CONTROL_FILE_CHECKSUM_SIZE 1 #define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE) -#define CONTROL_FILE_LSN_SIZE (4+4) +#define CONTROL_FILE_LSN_SIZE (3+4) #define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) #define CONTROL_FILE_FILENO_SIZE 4 #define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) @@ -59,20 +59,6 @@ uint32 last_logno; */ static int control_file_fd= -1; -static void lsn8store(char *buffer, const LSN *lsn) -{ - int4store(buffer, lsn->file_no); - int4store(buffer + CONTROL_FILE_FILENO_SIZE, lsn->rec_offset); -} - -static LSN lsn8korr(char *buffer) -{ - LSN tmp; - tmp.file_no= uint4korr(buffer); - tmp.rec_offset= uint4korr(buffer + CONTROL_FILE_FILENO_SIZE); - return tmp; -} - static char simple_checksum(char *buffer, uint size) { /* TODO: improve this sum if we want */ @@ -120,7 +106,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() "*store" and "*korr" calls in this file, and can even create backward compatibility problems. Beware! */ - DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (4+4)); + DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (3+4)); DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4); if (control_file_fd >= 0) /* already open */ @@ -154,11 +140,9 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() usable as soon as it has been written to the log). */ - LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; - uint32 imposs_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; - /* init the file with these "undefined" values */ - DBUG_RETURN(ma_control_file_write_and_force(&imposs_lsn, imposs_logno, + DBUG_RETURN(ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, + CONTROL_FILE_IMPOSSIBLE_FILENO, CONTROL_FILE_UPDATE_ALL)); } @@ -216,7 +200,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() error= CONTROL_FILE_BAD_CHECKSUM; goto err; } - last_checkpoint_lsn= lsn8korr(buffer + CONTROL_FILE_LSN_OFFSET); + last_checkpoint_lsn= lsn7korr(buffer + CONTROL_FILE_LSN_OFFSET); last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); DBUG_RETURN(0); @@ -253,7 +237,7 @@ err: 1 - Error */ -int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, +int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, uint objs_to_write) { char buffer[CONTROL_FILE_SIZE]; @@ -277,9 +261,9 @@ int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, DBUG_ASSERT(0); if (update_checkpoint_lsn) - lsn8store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); + lsn7store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); else /* store old value == change nothing */ - lsn8store(buffer + CONTROL_FILE_LSN_OFFSET, &last_checkpoint_lsn); + lsn7store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn); if (update_logno) int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno); @@ -297,7 +281,7 @@ int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, /* TODO: you need some protection to be able to write last_* global vars */ if (update_checkpoint_lsn) - last_checkpoint_lsn= *checkpoint_lsn; + last_checkpoint_lsn= checkpoint_lsn; if (update_logno) last_logno= logno; diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 4b5ddd006c1..8e5dafac24c 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -30,16 +30,14 @@ #define CONTROL_FILE_IMPOSSIBLE_FILENO 0 /* logs always have a header */ #define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0 -/* - indicate absence of LSN. -*/ -#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN){CONTROL_FILE_IMPOSSIBLE_FILENO,CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET}) +/* indicate absence of LSN. */ +#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN)0) /* Here is the interface of this module */ /* LSN of the last checkoint - (if last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO + (if last_checkpoint_lsn == CONTROL_FILE_IMPOSSIBLE_LSN then there was never a checkpoint) */ extern LSN last_checkpoint_lsn; @@ -72,7 +70,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open(); #define CONTROL_FILE_UPDATE_ALL 0 #define CONTROL_FILE_UPDATE_ONLY_LSN 1 #define CONTROL_FILE_UPDATE_ONLY_LOGNO 2 -int ma_control_file_write_and_force(const LSN *checkpoint_lsn, uint32 logno, +int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, uint objs_to_write); diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index d5c19e29ce2..01b4c68f12f 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -366,7 +366,7 @@ const char *maria_data_root; pointer to path */ -char *translog_filename_by_fileno(uint32 file_no, char *path) +static char *translog_filename_by_fileno(uint32 file_no, char *path) { char file_name[10 + 8 + 1]; char *res; @@ -391,7 +391,7 @@ char *translog_filename_by_fileno(uint32 file_no, char *path) file descriptor number */ -File open_logfile_by_number_no_cache(uint32 file_no) +static File open_logfile_by_number_no_cache(uint32 file_no) { File file; char path[FN_REFLEN]; @@ -421,7 +421,7 @@ File open_logfile_by_number_no_cache(uint32 file_no) 1 ERROR */ -my_bool translog_write_file_header() +static my_bool translog_write_file_header() { ulonglong timestamp; char page[TRANSLOG_PAGE_SIZE]; @@ -441,7 +441,8 @@ my_bool translog_write_file_header() /* loghandler page size/512 */ int2store(page + (8 + 8 + 4 + 4 + 4), TRANSLOG_PAGE_SIZE / 512); /* file number */ - int3store(page + (8 + 8 + 4 + 4 + 4 + 2), log_descriptor.horizon.file_no); + int3store(page + (8 + 8 + 4 + 4 + 4 + 2), + LSN_FILE_NO(log_descriptor.horizon)); bzero(page + (8 + 8 + 4 + 4 + 4 + 2 + 3), TRANSLOG_PAGE_SIZE - (8 + 8 + 4 + 4 + 4 + 2 + 3)); @@ -466,12 +467,11 @@ my_bool translog_write_file_header() 1 - Error */ -my_bool translog_buffer_init(struct st_translog_buffer *buffer) +static my_bool translog_buffer_init(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_init"); /* This buffer offset */ - buffer->last_lsn.file_no= buffer->offset.file_no= 0; - buffer->last_lsn.rec_offset= buffer->offset.rec_offset= 0; + buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; /* This Buffer File */ buffer->file= 0; buffer->overlay= 0; @@ -528,9 +528,10 @@ static my_bool translog_close_log_file(File file) 1 Error */ -my_bool translog_create_new_file() +static my_bool translog_create_new_file() { int i; + uint32 file_no= LSN_FILE_NO(log_descriptor.horizon); DBUG_ENTER("translog_create_new_file"); @@ -544,11 +545,11 @@ my_bool translog_create_new_file() } if ((log_descriptor.log_file_num[0]= - open_logfile_by_number_no_cache(log_descriptor.horizon.file_no)) <= 0 || + open_logfile_by_number_no_cache(file_no)) <= 0 || translog_write_file_header()) DBUG_RETURN(1); - if (ma_control_file_write_and_force(NULL, log_descriptor.horizon.file_no, + if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, file_no, CONTROL_FILE_UPDATE_ONLY_LOGNO)) DBUG_RETURN(1); @@ -657,31 +658,32 @@ static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, ptr= cursor->ptr; /* Page number */ - int3store(ptr, horizon->rec_offset / TRANSLOG_PAGE_SIZE); - ptr +=3; + int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE); + ptr+= 3; /* File number */ - int3store(ptr, horizon->file_no); - ptr +=3; + int3store(ptr, LSN_FILE_NO(*horizon)); + ptr+= 3; *(ptr ++)= (uchar) log_descriptor.flags; if (log_descriptor.flags & TRANSLOG_PAGE_CRC) { #ifndef DBUG_OFF DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)", - (ulong) horizon->file_no, (ulong) horizon->rec_offset)); + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon))); int4store(ptr, 0x11223344); #endif - ptr +=4; /* CRC will be put when page - will be finished */ + /* CRC will be put when page will be finished */ + ptr+= 4; } if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) { time_t tm; int2store(ptr, time(&tm) & 0xFFFF); - ptr +=(TRANSLOG_PAGE_SIZE / 512) * 2; + ptr+= (TRANSLOG_PAGE_SIZE / 512) * 2; } { uint len= (ptr -cursor->ptr); - horizon->rec_offset+= len; + *horizon+= len; /* it is increasing of offset part of the address */ cursor->current_page_size= len; if (!cursor->chaser) cursor->buffer->size+= len; @@ -819,10 +821,10 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, "Page addr: (%lu,0x%lx), " "size %lu (%lu), Pg: %u, left: %u", (uint) cursor->buffer_no, (ulong) cursor->buffer, - (ulong) cursor->buffer->offset.file_no, - (ulong) cursor->buffer->offset.rec_offset, - (ulong) horizon->file_no, - (ulong) (horizon->rec_offset - + (ulong) LSN_FILE_NO(cursor->buffer->offset), + (ulong) LSN_OFFSET(cursor->buffer->offset), + (ulong) LSN_FILE_NO(*horizon), + (ulong) (LSN_OFFSET(*horizon) - cursor->current_page_size), (ulong) cursor->buffer->size, (ulong) (cursor->ptr -cursor->buffer->buffer), @@ -830,9 +832,9 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, DBUG_ASSERT(cursor->ptr !=NULL); DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == cursor->current_page_size % TRANSLOG_PAGE_SIZE); - DBUG_ASSERT(horizon->file_no == cursor->buffer->offset.file_no); - DBUG_ASSERT(cursor->buffer->offset.rec_offset + - (cursor->ptr -cursor->buffer->buffer) == horizon->rec_offset); + DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)); + DBUG_ASSERT(LSN_OFFSET(cursor->buffer->offset) + + (cursor->ptr -cursor->buffer->buffer) == LSN_OFFSET(*horizon)); if (cursor->protected) { DBUG_PRINT("info", ("Already protected and finished")); @@ -843,7 +845,7 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, DBUG_PRINT("info", ("left %u", (uint) left)); bzero(cursor->ptr, left); cursor->ptr +=left; - horizon->rec_offset+= left; + *horizon+= left; /* offset increasing */ if (!cursor->chaser) cursor->buffer->size+= left; cursor->current_page_size= 0; @@ -888,6 +890,7 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, NOTE This buffer should be locked */ + static void translog_wait_for_writers(struct st_translog_buffer *buffer) { struct st_my_thread_var *thread; @@ -908,7 +911,8 @@ static void translog_wait_for_writers(struct st_translog_buffer *buffer) DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, " "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " "mutex: 0x%lx", - thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) thread, + (uint) buffer->buffer_no, (ulong) buffer, (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); #ifndef DBUG_OFF @@ -920,7 +924,8 @@ static void translog_wait_for_writers(struct st_translog_buffer *buffer) DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, " "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " "mutex: 0x%lx", - thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) thread, + (uint) buffer->buffer_no, (ulong) buffer, (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); #ifndef DBUG_OFF @@ -966,7 +971,8 @@ static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer) DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, " "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " "mutex: 0x%lx", - thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) thread, + (uint) buffer->buffer_no, (ulong) buffer, (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); #ifndef DBUG_OFF @@ -978,7 +984,8 @@ static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer) DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, " "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " "mutex: 0x%lx", - thread, (uint) buffer->buffer_no, (ulong) buffer, + (ulong) thread, + (uint) buffer->buffer_no, (ulong) buffer, (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); #ifndef DBUG_OFF @@ -1027,6 +1034,7 @@ static void translog_cursor_init(struct st_buffer_cursor *cursor, cursor It's cursor buffer_no Number of buffer */ + static void translog_start_buffer(struct st_translog_buffer *buffer, struct st_buffer_cursor *cursor, uint8 buffer_no) @@ -1036,11 +1044,10 @@ static void translog_start_buffer(struct st_translog_buffer *buffer, ("Assign buffer #%u (0x%lx) to file %u, offset 0x%lx(%lu)", (uint) buffer->buffer_no, (ulong) buffer, (uint) log_descriptor.log_file_num[0], - (ulong) log_descriptor.horizon.rec_offset, - (ulong) log_descriptor.horizon.rec_offset)); + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); DBUG_ASSERT(buffer_no == buffer->buffer_no); - buffer->last_lsn.file_no= 0; - buffer->last_lsn.rec_offset= 0; + buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; buffer->offset= log_descriptor.horizon; buffer->file= log_descriptor.log_file_num[0]; buffer->overlay= 0; @@ -1086,9 +1093,9 @@ static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, my_bool chasing= cursor->chaser; DBUG_ENTER("translog_buffer_next"); - DBUG_PRINT("info", ("horizon (%u,0x%lx), chasing: %d", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, chasing)); + DBUG_PRINT("info", ("horizon (%lu,0x%lx), chasing: %d", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), chasing)); DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0); @@ -1105,8 +1112,9 @@ static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, #endif if (new_file) { - horizon->file_no++; - horizon->rec_offset= TRANSLOG_PAGE_SIZE; /* header page */ + /* move the horizon to the next file and its header page */ + *horizon+= LSN_ONE_FILE; + *horizon= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE); if (!chasing && translog_create_new_file()) { DBUG_RETURN(1); @@ -1278,7 +1286,7 @@ static translog_size_t translog_variable_record_1group_decode_len(uchar **src) 0 - Error */ -uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) +static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) { DBUG_ENTER("translog_get_total_chunk_length"); switch (page[offset] & TRANSLOG_CHUNK_TYPE) { @@ -1380,9 +1388,10 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) ("Buffer #%u 0x%lx: locked by 0x%lx (0x%lx), " "file: %u, offset (%lu,0x%lx), size %lu", (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, my_thread_var, + (ulong) buffer->locked_by, (ulong) my_thread_var, (uint) buffer->file, - (ulong) buffer->offset.file_no, (ulong) buffer->offset.rec_offset, + (ulong) LSN_FILE_NO(buffer->offset), + (ulong) LSN_OFFSET(buffer->offset), (ulong) buffer->size)); DBUG_ASSERT(buffer->locked_by == my_thread_var); @@ -1407,7 +1416,7 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) }; if (pagecache_write(log_descriptor.pagecache, &file, - (buffer->offset.rec_offset + i) / TRANSLOG_PAGE_SIZE, + (LSN_OFFSET(buffer->offset) + i) / TRANSLOG_PAGE_SIZE, 3, buffer->buffer + i, PAGECACHE_PLAIN_PAGE, @@ -1416,20 +1425,20 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) { UNRECOVERABLE_ERROR(("Cant't write page (%lu,0x%lx) to pagecacte", (ulong) buffer->file, - (ulong) (buffer->offset.rec_offset + i))); + (ulong) (LSN_OFFSET(buffer->offset)+ i))); } } if (my_pwrite(buffer->file, (char*) buffer->buffer, - buffer->size, buffer->offset.rec_offset, + buffer->size, LSN_OFFSET(buffer->offset), MYF(MY_WME)) != buffer->size) { UNRECOVERABLE_ERROR(("Cant't buffer (%lu,0x%lx) size %lu to the disk (%d)", (ulong) buffer->file, - (ulong) buffer->offset.rec_offset, + (ulong) LSN_OFFSET(buffer->offset), (ulong) buffer->size, errno)); DBUG_RETURN(1); } - if (buffer->last_lsn.rec_offset != 0) /* if buffer->last_lsn is set */ + if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */ translog_set_sent_to_file(&buffer->last_lsn); /* Free buffer */ buffer->file= 0; @@ -1530,20 +1539,20 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) uint8 flags; uchar *page= (uchar*) page_addr; DBUG_ENTER("translog_page_validator"); - TRANSLOG_ADDRESS *addr= ((TRANSLOG_VALIDATOR_DATA*) data)->addr; + TRANSLOG_ADDRESS addr= *((TRANSLOG_VALIDATOR_DATA*) data)->addr; ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 0; - if (uint3korr(page) != addr->rec_offset / TRANSLOG_PAGE_SIZE || - uint3korr(page + 3) != addr->file_no) + if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE || + uint3korr(page + 3) != LSN_FILE_NO(addr)) { UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " "page address written in the page is incorrect :" "File %lu instead of %lu or page %lu instead of %lu", - (ulong) addr->file_no, (ulong) addr->rec_offset, - (ulong) uint3korr(page + 3), (ulong) addr->file_no, + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr), (ulong) uint3korr(page), - (ulong) addr->rec_offset / TRANSLOG_PAGE_SIZE)); + (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE)); DBUG_RETURN(1); } flags= page[3 + 3]; @@ -1552,7 +1561,7 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) { UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " "Garbage in the page flags field detected : %x", - (ulong) addr->file_no, (ulong) addr->rec_offset, + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), (uint) flags)); DBUG_RETURN(1); } @@ -1565,7 +1574,7 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) { UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " "CRC mismatch: calculated: %lx on the page %lx", - (ulong) addr->file_no, (ulong) addr->rec_offset, + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), (ulong) crc, (ulong) uint4korr(page + 3 + 3 + 1))); DBUG_RETURN(1); } @@ -1584,8 +1593,8 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) */ uint16 test= uint2korr(page + offset); DBUG_PRINT("info", ("sector #%u offset %u current %lx " - "read 0x%lx stored 0x%x%x", - i / 2, offset, current, + "read 0x%x stored 0x%x%x", + i / 2, offset, (ulong) current, (uint) uint2korr(page + offset), (uint) table[i], (uint) table[i + 1])); if (test < current) @@ -1614,8 +1623,8 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) current= test; DBUG_PRINT("info", ("sector #%u offset %u current %lx " - "read 0x%lx stored 0x%x%x", - i / 2, offset, current, + "read 0x%x stored 0x%x%x", + i / 2, offset, (ulong) current, (uint) uint2korr(page + offset), (uint) table[i], (uint) table[i + 1])); } @@ -1623,6 +1632,7 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) DBUG_RETURN(0); } + /* Get log page by file number and offset of the beginning of the page @@ -1637,19 +1647,21 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) NULL - Error */ -uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) +static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) { + TRANSLOG_ADDRESS addr= *(data->addr); uint cache_index; + uint32 file_no= LSN_FILE_NO(addr); DBUG_ENTER("translog_get_page"); DBUG_PRINT("enter", ("File %lu, Offset %lu(0x%lx)", - (ulong) data->addr->file_no, - (ulong) data->addr->rec_offset, - (ulong) data->addr->rec_offset)); + (ulong) file_no, + (ulong) LSN_OFFSET(addr), + (ulong) LSN_OFFSET(addr))); /* it is really page address */ - DBUG_ASSERT(data->addr->rec_offset % TRANSLOG_PAGE_SIZE == 0); + DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0); - if ((cache_index= log_descriptor.horizon.file_no - data->addr->file_no) < + if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - file_no) < OPENED_FILES_NUM) { PAGECACHE_FILE file; @@ -1657,7 +1669,7 @@ uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) if (log_descriptor.log_file_num[cache_index] == 0) { if ((log_descriptor.log_file_num[cache_index]= - open_logfile_by_number_no_cache(data->addr->file_no)) == 0) + open_logfile_by_number_no_cache(file_no)) == 0) { DBUG_RETURN(NULL); } @@ -1666,7 +1678,7 @@ uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) buffer= (uchar*) pagecache_valid_read(log_descriptor.pagecache, &file, - data->addr->rec_offset / TRANSLOG_PAGE_SIZE, + LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, 3, (char*) buffer, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, 0, @@ -1674,9 +1686,9 @@ uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) } else { - File file= open_logfile_by_number_no_cache(data->addr->file_no); + File file= open_logfile_by_number_no_cache(file_no); if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE, - data->addr->rec_offset, MYF(MY_FNABP | MY_WME))) + LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME))) buffer= NULL; else if (translog_page_validator((byte*) buffer, (gptr) data)) buffer= NULL; @@ -1705,25 +1717,28 @@ static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, { MY_STAT stat_buff, *stat; char path[FN_REFLEN]; + uint32 rec_offset; + uint32 file_no= LSN_FILE_NO(*addr); DBUG_ENTER("translog_get_last_page_addr"); - if ((stat= my_stat (translog_filename_by_fileno(addr->file_no, + if ((stat= my_stat (translog_filename_by_fileno(file_no, path), &stat_buff, MYF(MY_WME))) == NULL) DBUG_RETURN(1); DBUG_PRINT("info", ("File size %lu", (ulong) stat->st_size)); if (stat->st_size > TRANSLOG_PAGE_SIZE) { - addr->rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) * + rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) * TRANSLOG_PAGE_SIZE); - *last_page_ok= (stat->st_size == addr->rec_offset + TRANSLOG_PAGE_SIZE); + *last_page_ok= (stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE); } else { *last_page_ok= 0; - addr->rec_offset= 0; + rec_offset= 0; } - DBUG_PRINT("info", ("Last page: 0x%lx, ok %d", (ulong) addr->rec_offset, + *addr= MAKE_LSN(file_no, rec_offset); + DBUG_PRINT("info", ("Last page: 0x%lx, ok %d", (ulong) rec_offset, *last_page_ok)); DBUG_RETURN(0); } @@ -1739,6 +1754,7 @@ static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, RETURN 1,3,4,5 - number of bytes to store given length */ + static uint translog_variable_record_length_bytes(translog_size_t length) { if (length < 250) @@ -1764,7 +1780,7 @@ static uint translog_variable_record_length_bytes(translog_size_t length) 0 - Error */ -uint16 translog_get_chunk_header_length(uchar *page, uint16 offset) +static uint16 translog_get_chunk_header_length(uchar *page, uint16 offset) { DBUG_ENTER("translog_get_chunk_header_length"); switch (page[offset] & TRANSLOG_CHUNK_TYPE) { @@ -1921,31 +1937,30 @@ my_bool translog_init(const char *directory, /* TODO: check that last checkpoint within present log addresses space */ /* find the log end */ - if (last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) { - DBUG_ASSERT(last_checkpoint_lsn.rec_offset == 0); + DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0); /* there was no checkpoints we will read from the beginning */ - sure_page.file_no= 1; - sure_page.rec_offset= TRANSLOG_PAGE_SIZE; + sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE); } else { sure_page= last_checkpoint_lsn; - DBUG_ASSERT(sure_page.rec_offset % TRANSLOG_PAGE_SIZE != 0); - sure_page.rec_offset-= sure_page.rec_offset % TRANSLOG_PAGE_SIZE; + DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0); + sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE; } - log_descriptor.horizon.file_no= last_page.file_no= last_logno; + log_descriptor.horizon= last_page= MAKE_LSN(last_logno,0); if (translog_get_last_page_addr(&last_page, &pageok)) DBUG_RETURN(1); - if (last_page.rec_offset == 0) + if (LSN_OFFSET(last_page) == 0) { - if (last_page.file_no == 1) + if (LSN_FILE_NO(last_page) == 1) { logs_found= 0; /* file #1 has no pages */ } else { - last_page.file_no--; + last_page-= LSN_ONE_FILE; if (translog_get_last_page_addr(&last_page, &pageok)) DBUG_RETURN(1); } @@ -1956,25 +1971,22 @@ my_bool translog_init(const char *directory, TRANSLOG_ADDRESS current_page= sure_page; my_bool pageok; - DBUG_ASSERT(sure_page.file_no < last_page.file_no || - (sure_page.file_no == last_page.file_no && - sure_page.rec_offset <= last_page.rec_offset)); + DBUG_ASSERT(sure_page <= last_page); /* TODO: check page size */ - last_valid_page.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO; - last_valid_page.rec_offset= 0; + last_valid_page= CONTROL_FILE_IMPOSSIBLE_LSN; /* scan and validate pages */ do { TRANSLOG_ADDRESS current_file_last_page; - current_file_last_page.file_no= current_page.file_no; + current_file_last_page= current_page; if (translog_get_last_page_addr(¤t_file_last_page, &pageok)) DBUG_RETURN(1); if (!pageok) { - DBUG_PRINT("error", ("File %u have no complete last page", - (uint) current_file_last_page.file_no)); + DBUG_PRINT("error", ("File %lu have no complete last page", + (ulong) LSN_FILE_NO(current_file_last_page))); old_log_was_recovered= 1; /* This file is not written till the end so it should be last */ last_page= current_file_last_page; @@ -1992,36 +2004,36 @@ my_bool translog_init(const char *directory, if (data.was_recovered) { DBUG_PRINT("error", ("file no %u (%d), rec_offset 0x%lx (%lu) (%d)", - (uint) current_page.file_no, - (uint3korr(page + 3) != current_page.file_no), - (ulong) current_page.rec_offset, - (ulong) (current_page.rec_offset / + (uint) LSN_FILE_NO(current_page), + (uint3korr(page + 3) != + LSN_FILE_NO(current_page)), + (ulong) LSN_OFFSET(current_page), + (ulong) (LSN_OFFSET(current_page) / TRANSLOG_PAGE_SIZE), (uint3korr(page) != - current_page.rec_offset / TRANSLOG_PAGE_SIZE))); + LSN_OFFSET(current_page) / + TRANSLOG_PAGE_SIZE))); old_log_was_recovered= 1; break; } last_valid_page= current_page; - current_page.rec_offset+= TRANSLOG_PAGE_SIZE; - } while (current_page.rec_offset <= current_file_last_page.rec_offset); - current_page.file_no++; - current_page.rec_offset= TRANSLOG_PAGE_SIZE; - } while (current_page.file_no <= last_page.file_no && + current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */ + } while (current_page <= current_file_last_page); + current_page+= LSN_ONE_FILE; + current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE); + } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) && !old_log_was_recovered); - if (last_valid_page.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (last_valid_page == CONTROL_FILE_IMPOSSIBLE_LSN) { - DBUG_ASSERT(last_valid_page.rec_offset == 0); - /* Panic!!! Even page which should be valid is invalid */ /* TODO: issue error */ DBUG_RETURN(1); } DBUG_PRINT("info", ("Last valid page is in file %lu offset %lu (0x%lx), " "Logs found: %d, was recovered: %d", - (ulong) last_valid_page.file_no, - (ulong) last_valid_page.rec_offset, - (ulong) last_valid_page.rec_offset, + (ulong) LSN_FILE_NO(last_valid_page), + (ulong) LSN_OFFSET(last_valid_page), + (ulong) LSN_OFFSET(last_valid_page), logs_found, old_log_was_recovered)); /* TODO: check server ID */ @@ -2034,7 +2046,8 @@ my_bool translog_init(const char *directory, uchar buffer[TRANSLOG_PAGE_SIZE], *page; uint16 chunk_offset; /* continue old log */ - DBUG_ASSERT(last_valid_page.file_no == log_descriptor.horizon.file_no); + DBUG_ASSERT(LSN_FILE_NO(last_valid_page)== + LSN_FILE_NO(log_descriptor.horizon)); if ((page= translog_get_page(&data, buffer)) == NULL || (chunk_offset= translog_get_first_chunk_offset(page)) == 0) @@ -2064,8 +2077,9 @@ my_bool translog_init(const char *directory, log_descriptor.bc.buffer->size+= chunk_offset; log_descriptor.bc.ptr+= chunk_offset; log_descriptor.bc.current_page_size= chunk_offset; - log_descriptor.horizon.rec_offset= - chunk_offset + last_valid_page.rec_offset; + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + (chunk_offset + + LSN_OFFSET(last_valid_page))); DBUG_PRINT("info", ("Move Page #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", (uint) log_descriptor.bc.buffer_no, (ulong) log_descriptor.bc.buffer, @@ -2088,16 +2102,14 @@ my_bool translog_init(const char *directory, if (!logs_found) { /* Start new log system from scratch */ - /* Current log number */ - log_descriptor.horizon.file_no= 1; /* Used space */ - log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; // header page + log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); // header page /* Current logs file number in page cache */ log_descriptor.log_file_num[0]= - open_logfile_by_number_no_cache(log_descriptor.horizon.file_no); + open_logfile_by_number_no_cache(1); if (translog_write_file_header()) DBUG_RETURN(1); - if (ma_control_file_write_and_force(NULL, log_descriptor.horizon.file_no, + if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1, CONTROL_FILE_UPDATE_ONLY_LOGNO)) DBUG_RETURN(1); /* assign buffer 0 */ @@ -2124,9 +2136,11 @@ my_bool translog_init(const char *directory, } else { - log_descriptor.horizon.file_no++; /* leave the demaged file - untouched */ - log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; /* header page */ + /* leave the demaged file untouched */ + log_descriptor.horizon+= LSN_ONE_FILE; + /* header page */ + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); if (translog_create_new_file()) DBUG_RETURN(1); /* @@ -2140,8 +2154,8 @@ my_bool translog_init(const char *directory, /* all LSNs that are on disk are flushed */ log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon; - log_descriptor.flushed.rec_offset--; - log_descriptor.sent_to_file.rec_offset--; + log_descriptor.flushed--; /* offset decreased */ + log_descriptor.sent_to_file--; /* offset decreased */ DBUG_RETURN(0); } @@ -2162,10 +2176,11 @@ static void translog_buffer_destroy(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_destroy"); DBUG_PRINT("enter", - ("Buffer #%u: 0x%lx, file: %u, offset (%u,0x%lx), size %lu", + ("Buffer #%u: 0x%lx, file: %u, offset (%lu,0x%lx), size %lu", (uint) buffer->buffer_no, (ulong) buffer, (uint) buffer->file, - (ulong) buffer->offset.file_no, (ulong) buffer->offset.rec_offset, + (ulong) LSN_FILE_NO(buffer->offset), + (ulong) LSN_OFFSET(buffer->offset), (ulong) buffer->size)); DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0); if (buffer->file) @@ -2300,23 +2315,24 @@ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, if ((cursor->ptr +TRANSLOG_PAGE_SIZE > cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) || - (horizon->rec_offset + TRANSLOG_PAGE_SIZE > - log_descriptor.log_file_max_size)) + (LSN_OFFSET(*horizon) > + log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE)) { DBUG_PRINT("info", ("Switch to next buffer, Buffer Size %lu (%lu) => %d, " "File size %lu max %lu => %d", (ulong) cursor->buffer->size, (ulong) (cursor->ptr -cursor->buffer->buffer), - (cursor->ptr +TRANSLOG_PAGE_SIZE > + (cursor->ptr + TRANSLOG_PAGE_SIZE > cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER), - (ulong) horizon->rec_offset, + (ulong) LSN_OFFSET(*horizon), (ulong) log_descriptor.log_file_max_size, - (horizon->rec_offset + TRANSLOG_PAGE_SIZE > - log_descriptor.log_file_max_size))); + (LSN_OFFSET(*horizon) > + (log_descriptor.log_file_max_size - + TRANSLOG_PAGE_SIZE)))); if (translog_buffer_next(horizon, cursor, - (horizon->rec_offset + - TRANSLOG_PAGE_SIZE) > - log_descriptor.log_file_max_size)) + LSN_OFFSET(*horizon) > + (log_descriptor.log_file_max_size - + TRANSLOG_PAGE_SIZE))) DBUG_RETURN(1); *prev_buffer= buffer; DBUG_PRINT("info", ("Buffer #%u (0x%lu) have to be flushed", @@ -2353,9 +2369,10 @@ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, 1 - Error */ -my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, - struct st_buffer_cursor *cursor, - translog_size_t length, uchar *buffer) +static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + uchar *buffer) { DBUG_ENTER("translog_write_data_on_page"); DBUG_PRINT("enter", ("Chunk length: %lu Page size %u", @@ -2367,7 +2384,7 @@ my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, memmove(cursor->ptr, buffer, length); cursor->ptr+= length; - horizon->rec_offset+= length; + *horizon+= length; /* adds offset */ cursor->current_page_size+= length; if (!cursor->chaser) cursor->buffer->size+= length; @@ -2401,10 +2418,10 @@ my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, 1 - Error */ -my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, - struct st_buffer_cursor *cursor, - translog_size_t length, - struct st_translog_parts *parts) +static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + struct st_translog_parts *parts) { translog_size_t left= length; uint cur= (uint) parts->current; @@ -2457,16 +2474,25 @@ my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, cursor->ptr+= len; } while (left); + DBUG_PRINT("info", ("Horizon (%lu,0x%lx) Length %lu(0x%lx)", + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon), + (ulong) length, (ulong) length)); parts->current= cur; - horizon->rec_offset+= length; + *horizon+= length; /* offset increasing */ cursor->current_page_size+= length; if (!cursor->chaser) cursor->buffer->size+= length; - DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx, " - "chaser: %d, Size: %lu (%lu)", + DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu) " + "Horizon (%lu,0x%lx) buff offset 0x%lx", (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr -cursor->buffer->buffer), + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon), + (ulong) (LSN_OFFSET(cursor->buffer->offset) + + cursor->buffer->size))); DBUG_ASSERT(cursor->chaser || ((ulong) (cursor->ptr -cursor->buffer->buffer) == cursor->buffer->size)); @@ -2687,15 +2713,14 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) { translog_size_t last_page_offset= log_descriptor.page_overhead + last_page_data; - translog_size_t offset= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size /* next - page - */ + - pages * TRANSLOG_PAGE_SIZE + last_page_offset; + translog_size_t offset= (TRANSLOG_PAGE_SIZE - + log_descriptor.bc.current_page_size + + pages * TRANSLOG_PAGE_SIZE + last_page_offset); translog_size_t buffer_end_offset, file_end_offset, min_offset; DBUG_ENTER("translog_advance_pointer"); - DBUG_PRINT("enter", ("Pointer: (%u, 0x%lx) + %u + %u pages + %u + %u", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, + DBUG_PRINT("enter", ("Pointer: (%lu, 0x%lx) + %u + %u pages + %u + %u", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), (uint) (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size), pages, (uint) log_descriptor.page_overhead, @@ -2709,7 +2734,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) struct st_translog_buffer *old_buffer; buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size; file_end_offset= - log_descriptor.log_file_max_size - log_descriptor.horizon.rec_offset; + log_descriptor.log_file_max_size - LSN_OFFSET(log_descriptor.horizon); DBUG_PRINT("info", ("offset: %lu, buffer_end_offs: %lu, " "file_end_offs: %lu", (ulong) offset, (ulong) buffer_end_offset, @@ -2719,14 +2744,14 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) (uint) log_descriptor.bc.buffer->buffer_no, (uint) log_descriptor.bc.buffer_no, (ulong) log_descriptor.bc.buffer, - (ulong) log_descriptor.bc.buffer->offset.rec_offset, + (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), (ulong) log_descriptor.bc.buffer->size, - (ulong) (log_descriptor.bc.buffer->offset.rec_offset + + (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) + log_descriptor.bc.buffer->size), - (ulong) log_descriptor.horizon.rec_offset)); - DBUG_ASSERT(log_descriptor.bc.buffer->offset.rec_offset + + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) + log_descriptor.bc.buffer->size == - log_descriptor.horizon.rec_offset); + LSN_OFFSET(log_descriptor.horizon)); if (offset <= buffer_end_offset && offset <= file_end_offset) break; @@ -2740,7 +2765,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) min_offset= (buffer_end_offset < file_end_offset ? buffer_end_offset : file_end_offset); log_descriptor.bc.buffer->size+= min_offset; - log_descriptor.bc.ptr +=min_offset; + log_descriptor.bc.ptr+= min_offset; DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", (uint) log_descriptor.bc.buffer->buffer_no, (ulong) log_descriptor.bc.buffer, @@ -2757,9 +2782,11 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) if (file_end_offset <= buffer_end_offset) { - log_descriptor.horizon.file_no++; - log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; - DBUG_PRINT("info", ("New file %d", log_descriptor.horizon.file_no)); + log_descriptor.horizon+= LSN_ONE_FILE; + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); + DBUG_PRINT("info", ("New file %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon))); if (translog_create_new_file()) { DBUG_RETURN(1); @@ -2768,7 +2795,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) else { DBUG_PRINT("info", ("The same file")); - log_descriptor.horizon.rec_offset+= min_offset; + log_descriptor.horizon+= min_offset; /* offset increasing */ } translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); if (translog_buffer_unlock(old_buffer)) @@ -2780,7 +2807,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) log_descriptor.bc.ptr+= offset; log_descriptor.bc.buffer->size+= offset; translog_buffer_increase_writers(log_descriptor.bc.buffer); - log_descriptor.horizon.rec_offset+= offset; + log_descriptor.horizon+= offset; /* offset increasing */ log_descriptor.bc.current_page_size= last_page_offset; DBUG_PRINT("info", ("drop write_counter")); log_descriptor.bc.write_counter= 0; @@ -2801,9 +2828,9 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == log_descriptor.bc.buffer_no); DBUG_PRINT("info", - ("pointer moved to: (%u, 0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset)); + ("pointer moved to: (%lu, 0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer-> buffer) %TRANSLOG_PAGE_SIZE == log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE); @@ -2865,18 +2892,18 @@ static translog_size_t translog_get_current_group_size() DBUG_ENTER("translog_get_current_group_size"); - DBUG_PRINT("info", ("buffer_rest in pages %lu", buffer_rest)); + DBUG_PRINT("info", ("buffer_rest in pages %u", buffer_rest)); buffer_rest*= log_descriptor.page_capacity_chunk_2; /* in case of only half of buffer free we can write this and next buffer */ if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2) { - DBUG_PRINT("info", ("buffer_rest %lu -> add %lu", + DBUG_PRINT("info", ("buffer_rest %u -> add %lu", buffer_rest, (ulong) log_descriptor.buffer_capacity_chunk_2)); buffer_rest+= log_descriptor.buffer_capacity_chunk_2; } - DBUG_PRINT("info", ("buffer_rest %lu", buffer_rest)); + DBUG_PRINT("info", ("buffer_rest %u", buffer_rest)); DBUG_RETURN(buffer_rest); } @@ -2984,20 +3011,22 @@ translog_write_variable_record_1group(LSN *lsn, translog_write_parts_on_page(&horizon, &cursor, first_page, parts); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, (ulong) horizon.rec_offset)); + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); for (i= 0; i < full_pages; i++) { if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, (ulong) horizon.rec_offset)); + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); } if (additional_chunk3_page) @@ -3007,10 +3036,11 @@ translog_write_variable_record_1group(LSN *lsn, page_capacity_chunk_2 - 2, &horizon, &cursor)) DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, (ulong) horizon.rec_offset)); + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); DBUG_ASSERT(cursor.current_page_size == TRANSLOG_PAGE_SIZE); } @@ -3018,10 +3048,11 @@ translog_write_variable_record_1group(LSN *lsn, record_rest, &horizon, &cursor)) DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, (ulong) horizon.rec_offset)); + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); rc= translog_buffer_lock(cursor.buffer); if (!rc) @@ -3029,7 +3060,6 @@ translog_write_variable_record_1group(LSN *lsn, /* check if we wrote something on lst not full page and need to reconstruct CRC and sector protection - if (buffer->offset.rec_offset + buffer->size - horizon->rec_offset > */ translog_buffer_decrease_writers(cursor.buffer); } @@ -3129,19 +3159,19 @@ translog_write_variable_record_1chunk(LSN *lsn, NULL - error */ -static uchar *translog_put_LSN_diff(LSN *base_lsn, LSN *lsn, uchar *dst) +static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) { DBUG_ENTER("translog_put_LSN_diff"); - DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), val: (0x%lx,0x%lx), dst 0x%lx", - (ulong) base_lsn->file_no, - (ulong) base_lsn->rec_offset, - (ulong) lsn->file_no, - (ulong) lsn->rec_offset, (ulong) dst)); - if (base_lsn->file_no == lsn->file_no) + DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx), val: (0x%lu,0x%lx), dst 0x%lx", + (ulong) LSN_FILE_NO(base_lsn), + (ulong) LSN_OFFSET(base_lsn), + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn), (ulong) dst)); + if (LSN_FILE_NO(base_lsn) == LSN_FILE_NO(lsn)) { uint32 diff; - DBUG_ASSERT(base_lsn->rec_offset > lsn->rec_offset); - diff= base_lsn->rec_offset - lsn->rec_offset; + DBUG_ASSERT(base_lsn > lsn); + diff= base_lsn - lsn; if (diff <= 0x3FFF) { dst-= 2; @@ -3171,16 +3201,16 @@ static uchar *translog_put_LSN_diff(LSN *base_lsn, LSN *lsn, uchar *dst) { uint32 diff; uint32 offset_diff; - ulonglong base_offset= base_lsn->rec_offset; - DBUG_ASSERT(base_lsn->file_no > lsn->file_no); - diff= base_lsn->file_no - lsn->file_no; - if (base_offset < lsn->rec_offset) + ulonglong base_offset= LSN_OFFSET(base_lsn); + DBUG_ASSERT(base_lsn > lsn); + diff= LSN_FILE_NO(base_lsn) - LSN_FILE_NO(lsn); + if (base_offset < LSN_OFFSET(lsn)) { /* take 1 from file offset */ diff--; base_offset+= 0x100000000LL; } - offset_diff= base_offset - lsn->rec_offset; + offset_diff= base_offset - LSN_OFFSET(lsn); if (diff > 0x3f) { /*TODO: error - too long transaction - panic!!! */ @@ -3222,7 +3252,7 @@ static uchar *translog_put_LSN_diff(LSN *base_lsn, LSN *lsn, uchar *dst) pointer to buffer after decoded LSN */ -static uchar *translog_get_LSN_from_diff(LSN *base_lsn, uchar *src, uchar *dst) +static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst) { LSN lsn; uint32 diff; @@ -3230,42 +3260,39 @@ static uchar *translog_get_LSN_from_diff(LSN *base_lsn, uchar *src, uchar *dst) uint8 code; DBUG_ENTER("translog_get_LSN_from_diff"); DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), src: 0x%lx, dst 0x%lx", - (ulong) base_lsn->file_no, - (ulong) base_lsn->rec_offset, (ulong) src, (ulong) dst)); + (ulong) LSN_FILE_NO(base_lsn), + (ulong) LSN_OFFSET(base_lsn), + (ulong) src, (ulong) dst)); first_byte= *((uint8*) src); code= first_byte & 0xC0; first_byte &= 0x3F; switch (code) { case 0x00: - lsn.file_no= base_lsn->file_no; - lsn.rec_offset= - base_lsn->rec_offset - ((first_byte << 8) + *((uint8*) (src + 1))); + lsn= base_lsn - ((first_byte << 8) + *((uint8*) (src + 1))); src+= 2; break; case 0x40: - lsn.file_no= base_lsn->file_no; diff= uint2korr(src + 1); - lsn.rec_offset= base_lsn->rec_offset - ((first_byte << 16) + diff); + lsn= base_lsn - ((first_byte << 16) + diff); src+= 3; break; case 0x80: - lsn.file_no= base_lsn->file_no; diff= uint3korr(src + 1); - lsn.rec_offset= base_lsn->rec_offset - ((first_byte << 24) + diff); + lsn= base_lsn - ((first_byte << 24) + diff); src+= 4; break; case 0xC0: { - ulonglong base_offset= base_lsn->rec_offset; diff= uint4korr(src + 1); - if (diff > base_lsn->rec_offset) + ulonglong base_offset= LSN_OFFSET(base_lsn); + if (diff > LSN_OFFSET(base_lsn)) { /* take 1 from file offset */ first_byte++; base_offset+= 0x100000000LL; } - lsn.file_no= base_lsn->file_no - first_byte; - lsn.rec_offset= base_offset - diff; + lsn= MAKE_LSN(LSN_FILE_NO(base_lsn) - first_byte, + base_offset - diff); src+= 5; break; } @@ -3273,7 +3300,7 @@ static uchar *translog_get_LSN_from_diff(LSN *base_lsn, uchar *src, uchar *dst) DBUG_ASSERT(0); DBUG_RETURN(NULL); } - lsn7store(dst, &lsn); + lsn7store(dst, lsn); DBUG_PRINT("info", ("new src: 0x%lx", (ulong) dst)); DBUG_RETURN(src); } @@ -3295,7 +3322,7 @@ static uchar *translog_get_LSN_from_diff(LSN *base_lsn, uchar *src, uchar *dst) */ static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, - LSN *base_lsn, + LSN base_lsn, uint lsns, uchar *compressed_LSNs) { struct st_translog_part part; @@ -3345,8 +3372,8 @@ static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, uint i; for (i= 0; i < lsns; i++, ref_ptr-= 7) { - lsn7korr(&ref, ref_ptr); - if ((dst_ptr= translog_put_LSN_diff(base_lsn, &ref, dst_ptr)) == NULL) + ref= lsn7korr(ref_ptr); + if ((dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr)) == NULL) DBUG_RETURN(1); } economy= (dst_ptr - part.buff); @@ -3462,14 +3489,15 @@ translog_write_variable_record_mgroup(LSN *lsn, DBUG_RETURN(1); } - DBUG_PRINT("info", ("chunk #%u first_page: %u (%u), full_pages: %u (%lu), " + DBUG_PRINT("info", ("chunk #%u first_page: %u (%u), full_pages: %lu (%lu), " "Left %lu", groups.elements, first_page, first_page - 1, - full_pages, + (ulong) full_pages, (ulong) full_pages * log_descriptor.page_capacity_chunk_2, - parts->record_length - (first_page - 1 + buffer_rest) - + (ulong)parts->record_length - + (first_page - 1 + buffer_rest) - done)); translog_advance_pointer(full_pages, 0); @@ -3493,12 +3521,12 @@ translog_write_variable_record_mgroup(LSN *lsn, translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " "Left: %lu", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, - (ulong) horizon.rec_offset, + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), (ulong) (parts->record_length - (first_page - 1) - done))); @@ -3510,12 +3538,12 @@ translog_write_variable_record_mgroup(LSN *lsn, DBUG_RETURN(1); } - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)" + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)" "Left: %lu", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, - (ulong) horizon.rec_offset, + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), (ulong) (parts->record_length - (first_page - 1) - i * log_descriptor.page_capacity_chunk_2 - done))); @@ -3559,8 +3587,7 @@ translog_write_variable_record_mgroup(LSN *lsn, group.addr= horizon= log_descriptor.horizon; cursor= log_descriptor.bc; cursor.chaser= 1; - group.num= 0; /* 0 because it does not matter - */ + group.num= 0; /* 0 because it does not matter */ if (insert_dynamic(&groups, (gptr) &group)) { delete_dynamic(&groups); @@ -3643,12 +3670,12 @@ translog_write_variable_record_mgroup(LSN *lsn, DBUG_PRINT("info", ("chunk 2 to finish first page")); translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " "Left: %lu", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, - (ulong) horizon.rec_offset, + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), (ulong) (parts->record_length - (first_page - 1) - done))); } @@ -3661,12 +3688,12 @@ translog_write_variable_record_mgroup(LSN *lsn, int2store(chunk3_header + 1, chunk3_size); translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header); translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts); - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " "Left: %lu", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, - (ulong) horizon.rec_offset, + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), (ulong) (parts->record_length - chunk3_size - done))); chunk3_pages= 0; } @@ -3685,12 +3712,12 @@ translog_write_variable_record_mgroup(LSN *lsn, DBUG_RETURN(1); } - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) " + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " "Left: %lu", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, - (ulong) horizon.rec_offset, + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), (ulong) (parts->record_length - (first_page - 1) - i * log_descriptor.page_capacity_chunk_2 - done))); @@ -3704,10 +3731,11 @@ translog_write_variable_record_mgroup(LSN *lsn, delete_dynamic(&groups); DBUG_RETURN(1); } - DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset, - (uint) horizon.file_no, (ulong) horizon.rec_offset)); + DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN); @@ -3783,7 +3811,7 @@ translog_write_variable_record_mgroup(LSN *lsn, for (i= curr_group; i < limit + curr_group; i++) { get_dynamic(&groups, (gptr) &group, i); - lsn7store(group_desc, &group.addr); + lsn7store(group_desc, group.addr); group_desc[7]= group.num; translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc); } @@ -3840,9 +3868,9 @@ static my_bool translog_write_variable_record(LSN *lsn, DBUG_ENTER("translog_write_variable_record"); translog_lock(); - DBUG_PRINT("info", ("horizon (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset)); + DBUG_PRINT("info", ("horizon (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size; DBUG_PRINT("info", ("header length %u, page_rest: %u", header_length1, page_rest)); @@ -3871,7 +3899,7 @@ static my_bool translog_write_variable_record(LSN *lsn, */ if (log_record_type_descriptor[type].compresed_LSN > 0) { - if (translog_relative_LSN_encode(parts, &log_descriptor.horizon, + if (translog_relative_LSN_encode(parts, log_descriptor.horizon, log_record_type_descriptor[type]. compresed_LSN, compressed_LSNs)) { @@ -3889,7 +3917,7 @@ static my_bool translog_write_variable_record(LSN *lsn, translog_variable_record_length_bytes(parts->record_length); DBUG_PRINT("info", ("after compressing LSN(s) header length %u, " "record length %lu", - header_length1, parts->record_length)); + header_length1, (ulong)parts->record_length)); } /* TODO: check space on current page for header + few bytes */ @@ -3960,9 +3988,9 @@ static my_bool translog_write_fixed_record(LSN *lsn, log_record_type_descriptor[type].fixed_length)); translog_lock(); - DBUG_PRINT("info", ("horizon (%u,0x%lx)", - (uint) log_descriptor.horizon.file_no, - (ulong) log_descriptor.horizon.rec_offset)); + DBUG_PRINT("info", ("horizon (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); DBUG_PRINT("info", @@ -4002,7 +4030,7 @@ static my_bool translog_write_fixed_record(LSN *lsn, if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH) { DBUG_ASSERT(log_record_type_descriptor[type].compresed_LSN > 0); - if (translog_relative_LSN_encode(parts, lsn, + if (translog_relative_LSN_encode(parts, *lsn, log_record_type_descriptor[type]. compresed_LSN, compressed_LSNs)) { @@ -4185,7 +4213,7 @@ my_bool translog_write_record(LSN *lsn, position in sources after decoded LSN(s) */ -static uchar *translog_relative_LSN_decode(LSN *base_lsn, +static uchar *translog_relative_LSN_decode(LSN base_lsn, uchar *src, uchar *dst, uint lsns) { uint i; @@ -4231,7 +4259,7 @@ translog_size_t translog_fixed_length_header(uchar *page, if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH) { DBUG_ASSERT(lsns > 0); - src= translog_relative_LSN_decode(&buff->lsn, src, dst, lsns); + src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns); lsns*= 7; dst+= lsns; length-= lsns; @@ -4321,7 +4349,7 @@ static my_bool translog_scanner_set_last_page(struct st_translog_scanner_data 0 - OK 1 - Error */ -static my_bool translog_init_scanner(LSN *lsn, +static my_bool translog_init_scanner(LSN lsn, my_bool fixed_horizon, struct st_translog_scanner_data *scanner) { @@ -4331,25 +4359,24 @@ static my_bool translog_init_scanner(LSN *lsn, }; DBUG_ENTER("translog_init_scanner"); - DBUG_PRINT("enter", ("LSN: (0x%lx,0x%lx)", - (ulong) lsn->file_no, (ulong) lsn->rec_offset)); - DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0); - scanner->page_offset= lsn->rec_offset % TRANSLOG_PAGE_SIZE; + DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn)); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); + scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; scanner->fixed_horizon= fixed_horizon; translog_scanner_set_horizon(scanner); - DBUG_PRINT("info", ("Horizon: (0x%lx,0x%lx)", - (ulong) scanner->horizon.file_no, - (ulong) scanner->horizon.rec_offset)); + DBUG_PRINT("info", ("Horizon: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon))); /* lsn < horizon */ - DBUG_ASSERT(lsn->file_no < scanner->horizon.file_no || - (lsn->file_no == scanner->horizon.file_no && - lsn->rec_offset < scanner->horizon.rec_offset)); + DBUG_ASSERT(lsn < scanner->horizon)); - scanner->page_addr= *lsn; - scanner->page_addr.rec_offset-= scanner->page_offset; + scanner->page_addr= lsn; + scanner->page_addr-= scanner->page_offset; /*decrease offset */ if (translog_scanner_set_last_page(scanner)) DBUG_RETURN(1); @@ -4376,16 +4403,14 @@ static my_bool translog_scanner_eol(struct st_translog_scanner_data *scanner) DBUG_ENTER("translog_scanner_eol"); DBUG_PRINT("enter", ("Horizon: (%lu, 0x%lx), Current: (%lu, 0x%lx+0x%x=0x%lx)", - (ulong) scanner->horizon.file_no, - (ulong) scanner->horizon.rec_offset, - (ulong) scanner->page_addr.file_no, - (ulong) scanner->page_addr.rec_offset, + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), (uint) scanner->page_offset, - (ulong) (scanner->page_addr.rec_offset + scanner->page_offset))); - if (scanner->horizon.file_no > scanner->page_addr.file_no || - (scanner->horizon.file_no == scanner->page_addr.file_no && - scanner->horizon.rec_offset > (scanner->page_addr.rec_offset + - scanner->page_offset))) + (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset))); + if (scanner->horizon > (scanner->page_addr + + scanner->page_offset)) { DBUG_PRINT("info", ("Horizon is not reached")); DBUG_RETURN(0); @@ -4398,14 +4423,10 @@ static my_bool translog_scanner_eol(struct st_translog_scanner_data *scanner) translog_scanner_set_horizon(scanner); DBUG_PRINT("info", ("Horizon is re-read, EOL: %d", - scanner->horizon.file_no <= scanner->page_addr.file_no && - (scanner->horizon.file_no != scanner->page_addr.file_no || - scanner->horizon.rec_offset <= (scanner->page_addr.rec_offset + - scanner->page_offset)))); - DBUG_RETURN(scanner->horizon.file_no <= scanner->page_addr.file_no && - (scanner->horizon.file_no != scanner->page_addr.file_no || - scanner->horizon.rec_offset <= (scanner->page_addr.rec_offset + - scanner->page_offset))); + scanner->horizon <= (scanner->page_addr + + scanner->page_offset))); + DBUG_RETURN(scanner->horizon <= (scanner->page_addr + + scanner->page_offset)); } @@ -4443,19 +4464,20 @@ static my_bool translog_scanner_eop(struct st_translog_scanner_data *scanner) static my_bool translog_scanner_eof(struct st_translog_scanner_data *scanner) { DBUG_ENTER("translog_scanner_eof"); - DBUG_ASSERT(scanner->page_addr.file_no == scanner->last_file_page.file_no); + DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) == + LSN_FILE_NO(scanner->last_file_page)); DBUG_PRINT("enter", ("curr Page 0x%lx, last page 0x%lx, " "normal EOF %d", - scanner->page_addr.rec_offset, - scanner->last_file_page.rec_offset, - scanner->page_addr.rec_offset == - scanner->last_file_page.rec_offset)); + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->last_file_page), + LSN_OFFSET(scanner->page_addr) == + LSN_OFFSET(scanner->last_file_page))); /* TODO: detect damaged file EOF, TODO: issue warning if damaged file EOF detected */ - DBUG_RETURN(scanner->page_addr.rec_offset == - scanner->last_file_page.rec_offset); + DBUG_RETURN(scanner->page_addr == + scanner->last_file_page); } @@ -4491,20 +4513,22 @@ static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) if (translog_scanner_eof(scanner)) { DBUG_PRINT("info", ("horizon (%lu,0x%lx) pageaddr (%lu,0x%lx)", - (ulong) scanner->horizon.file_no, - (ulong) scanner->horizon.rec_offset, - (ulong) scanner->page_addr.file_no, - (ulong) scanner->page_addr.rec_offset)); + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr))); /* if it is log end it have to be caught before */ - DBUG_ASSERT(scanner->horizon.file_no > scanner->page_addr.file_no); - scanner->page_addr.file_no++; - scanner->page_addr.rec_offset= TRANSLOG_PAGE_SIZE; + DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) > + LSN_FILE_NO(scanner->page_addr)); + scanner->page_addr+= LSN_ONE_FILE; + scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr, + TRANSLOG_PAGE_SIZE); if (translog_scanner_set_last_page(scanner)) DBUG_RETURN(1); } else { - scanner->page_addr.rec_offset+= TRANSLOG_PAGE_SIZE; + scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */ } { TRANSLOG_VALIDATOR_DATA data= @@ -4614,12 +4638,12 @@ translog_size_t translog_variable_length_header(uchar *page, for (i= 0; i < read; i++, curr++) { DBUG_ASSERT(curr < buff->groups_no); - lsn7korr(&buff->groups[curr].addr, src + i * (7 + 1)); + buff->groups[curr].addr= lsn7korr(src + i * (7 + 1)); buff->groups[curr].num= src[i * (7 + 1) + 7]; - DBUG_PRINT("info", ("group #%u (%u,0x%lx) chunks %u", + DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks %u", curr, - (uint) buff->groups[curr].addr.file_no, - (ulong) buff->groups[curr].addr.rec_offset, + (ulong) LSN_FILE_NO(buff->groups[curr].addr), + (ulong) LSN_OFFSET(buff->groups[curr].addr), (uint) buff->groups[curr].num)); } grp_no-= read; @@ -4628,18 +4652,19 @@ translog_size_t translog_variable_length_header(uchar *page, if (scanner) { buff->chunk0_data_addr= scanner->page_addr; - buff->chunk0_data_addr.rec_offset+= (page_offset + header_to_skip + - i * (7 + 1)); + buff->chunk0_data_addr+= (page_offset + header_to_skip + + i * (7 + 1)); /* offset increased */ } else { buff->chunk0_data_addr= buff->lsn; - buff->chunk0_data_addr.rec_offset+= (header_to_skip + i * (7 + 1)); + /* offset increased */ + buff->chunk0_data_addr+= (header_to_skip + i * (7 + 1)); } buff->chunk0_data_len= chunk_len - 2 - i * (7 + 1); - DBUG_PRINT("info", ("Data address (%u,0x%lx), len: %u", - (uint) buff->chunk0_data_addr.file_no, - (ulong) buff->chunk0_data_addr.rec_offset, + DBUG_PRINT("info", ("Data address (%lu,0x%lx), len: %u", + (ulong) LSN_FILE_NO(buff->chunk0_data_addr), + (ulong) LSN_OFFSET(buff->chunk0_data_addr), buff->chunk0_data_len)); break; } @@ -4647,7 +4672,7 @@ translog_size_t translog_variable_length_header(uchar *page, { DBUG_PRINT("info", ("use internal scanner for header reding")); scanner= &internal_scanner; - translog_init_scanner(&buff->lsn, 1, scanner); + translog_init_scanner(buff->lsn, 1, scanner); } translog_get_next_chunk(scanner); page= scanner->page; @@ -4665,7 +4690,7 @@ translog_size_t translog_variable_length_header(uchar *page, } base_lsn= buff->groups[0].addr; - translog_init_scanner(&base_lsn, 1, scanner); + translog_init_scanner(base_lsn, 1, scanner); /* first group chunk is always chunk type 2 */ page= scanner->page; page_offset= scanner->page_offset; @@ -4676,7 +4701,7 @@ translog_size_t translog_variable_length_header(uchar *page, if (lsns) { uchar *start= src; - src= translog_relative_LSN_decode(&base_lsn, src, dst, lsns); + src= translog_relative_LSN_decode(base_lsn, src, dst, lsns); lsns*= 7; dst+= lsns; length-= lsns; @@ -4730,9 +4755,10 @@ translog_read_record_header_from_buffer(uchar *page, TRANSLOG_CHUNK_FIXED); buff->type= (page[page_offset] & TRANSLOG_REC_TYPE); buff->short_trid= uint2korr(page + page_offset + 1); - DBUG_PRINT("info", ("Type %u, Sort TrID %u, LSN (%u,0x%lx)", + DBUG_PRINT("info", ("Type %u, Sort TrID %u, LSN (%lu,0x%lx)", (uint) buff->type, (uint)buff->short_trid, - buff->lsn.file_no, buff->lsn.rec_offset)); + (ulong) LSN_FILE_NO(buff->lsn), + (ulong) LSN_OFFSET(buff->lsn))); /* Read required bytes from the header and call hook */ switch (log_record_type_descriptor[buff->type].class) { @@ -4773,26 +4799,26 @@ translog_read_record_header_from_buffer(uchar *page, part of the header */ -translog_size_t translog_read_record_header(LSN *lsn, +translog_size_t translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff) { uchar buffer[TRANSLOG_PAGE_SIZE], *page; - translog_size_t page_offset= lsn->rec_offset % TRANSLOG_PAGE_SIZE; + translog_size_t page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; DBUG_ENTER("translog_read_record_header"); - DBUG_PRINT("enter", ("LSN: (0x%lx,0x%lx)", - (ulong) lsn->file_no, (ulong) lsn->rec_offset)); - DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0); + DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn))); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); - buff->lsn= *lsn; + buff->lsn= lsn; buff->groups_no= 0; { - TRANSLOG_ADDRESS addr= *lsn; + TRANSLOG_ADDRESS addr= lsn; TRANSLOG_VALIDATOR_DATA data= { &addr, 0 }; - addr.rec_offset-= page_offset; + addr-= page_offset; /* offset decreasing */ if ((page= translog_get_page(&data, buffer)) == NULL) DBUG_RETURN(0); } @@ -4832,19 +4858,19 @@ translog_read_record_header_scan(struct st_translog_scanner_data my_bool move_scanner) { DBUG_ENTER("translog_read_record_header_scan"); - DBUG_PRINT("enter", ("Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), " - "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d", - (uint) scanner->page_addr.file_no, - (ulong) scanner->page_addr.rec_offset, - (uint) scanner->horizon.file_no, - (ulong) scanner->horizon.rec_offset, - (uint) scanner->last_file_page.file_no, - (ulong) scanner->last_file_page.rec_offset, + DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx), Hrz: (%lu,0x%lx), " + "Lst: (%lu,0x%lx), Offset: %u(%x), fixed %d", + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->last_file_page), + (ulong) LSN_OFFSET(scanner->last_file_page), (uint) scanner->page_offset, (uint) scanner->page_offset, scanner->fixed_horizon)); buff->groups_no= 0; buff->lsn= scanner->page_addr; - buff->lsn.rec_offset+= scanner->page_offset; + buff->lsn+= scanner->page_offset; /* offset increasing */ DBUG_RETURN(translog_read_record_header_from_buffer(scanner->page, scanner->page_offset, buff, @@ -4884,7 +4910,8 @@ translog_read_record_header_scan(struct st_translog_scanner_data number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded part of the header */ -translog_size_t translog_read_next_record_header(LSN *lsn, + +translog_size_t translog_read_next_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff, my_bool fixed_horizon, struct @@ -4894,30 +4921,29 @@ translog_size_t translog_read_next_record_header(LSN *lsn, struct st_translog_scanner_data internal_scanner; uint8 chunk_type; - buff->groups_no= 0; /* to be sure that we will free - it right */ + buff->groups_no= 0; /* to be sure that we will free it right */ DBUG_ENTER("translog_read_next_record_header"); DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner)); if (scanner == NULL) { - DBUG_ASSERT(lsn != NULL); + DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); scanner= &internal_scanner; } if (lsn) { if (translog_init_scanner(lsn, fixed_horizon, scanner)) DBUG_RETURN(0); - DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); } - DBUG_PRINT("info", ("Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), " - "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d", - (uint) scanner->page_addr.file_no, - (ulong) scanner->page_addr.rec_offset, - (uint) scanner->horizon.file_no, - (ulong) scanner->horizon.rec_offset, - (uint) scanner->last_file_page.file_no, - (ulong) scanner->last_file_page.rec_offset, + DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx), Hrz: (%lu,0x%lx), " + "Lst: (%lu,0x%lx), Offset: %u(%x), fixed %d", + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->last_file_page), + (ulong) LSN_OFFSET(scanner->last_file_page), (uint) scanner->page_offset, (uint) scanner->page_offset, scanner->fixed_horizon)); @@ -4934,10 +4960,8 @@ translog_size_t translog_read_next_record_header(LSN *lsn, if (scanner->page[scanner->page_offset] == 0) { /* Last record was read */ - buff->lsn.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO; - buff->lsn.rec_offset= 0; - DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); /* just it is not error - */ + buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); /* just it is not error */ } DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0)); } @@ -4955,6 +4979,7 @@ translog_size_t translog_read_next_record_header(LSN *lsn, 0 - OK 1 - Error */ + static my_bool translog_record_read_next_chunk(struct st_translog_reader_data *data) { @@ -4978,7 +5003,7 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data data->current_group++; data->current_chunk= 0; DBUG_PRINT("info", ("skip to group #%u", data->current_group)); - translog_init_scanner(&data->header.groups[data->current_group].addr, + translog_init_scanner(data->header.groups[data->current_group].addr, 1, &data->scanner); } else @@ -4996,10 +5021,9 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data data->header.chunk0_data_len, data->scanner.page_offset, data->current_group, data->header.groups_no - 1)); DBUG_ASSERT(data->header.groups_no - 1 == data->current_group); - DBUG_ASSERT(data->header.lsn.file_no == data->scanner.page_addr.file_no && - data->header.lsn.rec_offset == - data->scanner.page_addr.rec_offset + data->scanner.page_offset); - translog_init_scanner(&data->header.chunk0_data_addr, 1, &data->scanner); + DBUG_ASSERT(data->header.lsn == + data->scanner.page_addr + data->scanner.page_offset); + translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner); data->chunk_size= data->header.chunk0_data_len; data->body_offset= data->scanner.page_offset; data->current_offset= new_current_offset; @@ -5044,7 +5068,7 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data 1 - Error */ -static my_bool translog_init_reader_data(LSN *lsn, +static my_bool translog_init_reader_data(LSN lsn, struct st_translog_reader_data *data) { DBUG_ENTER("translog_init_reader_data"); @@ -5085,7 +5109,7 @@ static my_bool translog_init_reader_data(LSN *lsn, length of data actually read */ -translog_size_t translog_read_record(LSN *lsn, +translog_size_t translog_read_record(LSN lsn, translog_size_t offset, translog_size_t length, uchar *buffer, @@ -5099,7 +5123,7 @@ translog_size_t translog_read_record(LSN *lsn, if (data == NULL) { - DBUG_ASSERT(lsn != NULL); + DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); data= &internal_data; } if (lsn || @@ -5110,15 +5134,15 @@ translog_size_t translog_read_record(LSN *lsn, DBUG_RETURN(0); } DBUG_PRINT("info", ("Offset %lu, length %lu " - "Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), " - "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d", + "Scanner: Cur: (%lu,0x%lx), Hrz: (%lu,0x%lx), " + "Lst: (%lu,0x%lx), Offset: %u(%x), fixed %d", (ulong) offset, (ulong) length, - (uint) data->scanner.page_addr.file_no, - (ulong) data->scanner.page_addr.rec_offset, - (uint) data->scanner.horizon.file_no, - (ulong) data->scanner.horizon.rec_offset, - (uint) data->scanner.last_file_page.file_no, - (ulong) data->scanner.last_file_page.rec_offset, + (ulong) LSN_FILE_NO(data->scanner.page_addr), + (ulong) LSN_OFFSET(data->scanner.page_addr), + (ulong) LSN_FILE_NO(data->scanner.horizon), + (ulong) LSN_OFFSET(data->scanner.horizon), + (ulong) LSN_FILE_NO(data->scanner.last_file_page), + (ulong) LSN_OFFSET(data->scanner.last_file_page), (uint) data->scanner.page_offset, (uint) data->scanner.page_offset, data->scanner.fixed_horizon)); @@ -5193,7 +5217,7 @@ static void translog_force_current_buffer_to_finish() uint16 current_page_size; new_buff_begunning= log_descriptor.bc.buffer->offset; - new_buff_begunning.rec_offset+= log_descriptor.bc.buffer->size; + new_buff_begunning+= log_descriptor.bc.buffer->size; /* increase offset */ DBUG_ENTER("translog_force_current_buffer_to_finish"); DBUG_PRINT("enter", ("Buffer #%u 0x%lx, " @@ -5203,13 +5227,13 @@ static void translog_force_current_buffer_to_finish() "size %lu (%lu), Pg: %u, left: %u", (uint) log_descriptor.bc.buffer_no, (ulong) log_descriptor.bc.buffer, - (ulong) log_descriptor.bc.buffer->offset.file_no, - (ulong) log_descriptor.bc.buffer->offset.rec_offset, - (ulong) log_descriptor.horizon.file_no, - (ulong) (log_descriptor.horizon.rec_offset - + (ulong) LSN_FILE_NO(log_descriptor.bc.buffer->offset), + (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) (LSN_OFFSET(log_descriptor.horizon) - log_descriptor.bc.current_page_size), - (ulong) new_buff_begunning.file_no, - (ulong) new_buff_begunning.rec_offset, + (ulong) LSN_FILE_NO(new_buff_begunning), + (ulong) LSN_OFFSET(new_buff_begunning), (ulong) log_descriptor.bc.buffer->size, (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. buffer->buffer), @@ -5219,11 +5243,11 @@ static void translog_force_current_buffer_to_finish() DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) %TRANSLOG_PAGE_SIZE == log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE); - DBUG_ASSERT(log_descriptor.horizon.file_no == - log_descriptor.bc.buffer->offset.file_no); - DBUG_ASSERT(log_descriptor.bc.buffer->offset.rec_offset + + DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) == + LSN_FILE_NO(log_descriptor.bc.buffer->offset)); + DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) + (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == - log_descriptor.horizon.rec_offset); + LSN_OFFSET(log_descriptor.horizon)); if (left != TRANSLOG_PAGE_SIZE && left != 0) { /* @@ -5232,7 +5256,8 @@ static void translog_force_current_buffer_to_finish() */ DBUG_PRINT("info", ("left %u", (uint) left)); - new_buff_begunning.rec_offset-= log_descriptor.bc.current_page_size; + /* decrease offset */ + new_buff_begunning-= log_descriptor.bc.current_page_size; current_page_size= log_descriptor.bc.current_page_size; bzero(log_descriptor.bc.ptr, left); @@ -5315,7 +5340,7 @@ static void translog_force_current_buffer_to_finish() 1 - Error */ -my_bool translog_flush(LSN *lsn) +my_bool translog_flush(LSN lsn) { LSN old_flushed, sent_to_file; int rc= 0; @@ -5323,8 +5348,9 @@ my_bool translog_flush(LSN *lsn) my_bool full_circle= 0; DBUG_ENTER("translog_flush"); - DBUG_PRINT("enter", ("Flush up to LSN (%u,0x%lx)", - (uint) lsn->file_no, (ulong) lsn->rec_offset)); + DBUG_PRINT("enter", ("Flush up to LSN (%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn))); translog_lock(); old_flushed= log_descriptor.flushed; @@ -5336,18 +5362,18 @@ my_bool translog_flush(LSN *lsn) struct st_translog_buffer *buffer= log_descriptor.bc.buffer; /* we can't flush in future */ - DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *lsn) >= 0); - if (cmp_translog_addr(log_descriptor.flushed, *lsn) >= 0) + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, lsn) >= 0); + if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0) { - DBUG_PRINT("info", ("already flushed (%u,0x%lx)", - (uint) log_descriptor.flushed.file_no, - (ulong) log_descriptor.flushed.rec_offset)); + DBUG_PRINT("info", ("already flushed (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.flushed), + (ulong) LSN_OFFSET(log_descriptor.flushed))); translog_unlock(); DBUG_RETURN(0); } /* send to the file if it is not sent */ translog_get_sent_to_file(&sent_to_file); - if (cmp_translog_addr(sent_to_file, *lsn) >= 0) + if (cmp_translog_addr(sent_to_file, lsn) >= 0) break; do @@ -5369,7 +5395,7 @@ my_bool translog_flush(LSN *lsn) break; } } while ((buffer_start != buffer_no) && - cmp_translog_addr(log_descriptor.flushed, *lsn) < 0); + cmp_translog_addr(log_descriptor.flushed, lsn) < 0); if (buffer_unlock != NULL) translog_buffer_unlock(buffer_unlock); if (translog_buffer_flush(buffer)) @@ -5382,12 +5408,13 @@ my_bool translog_flush(LSN *lsn) translog_lock(); } - for (i= old_flushed.file_no; i <= lsn->file_no; i++) + for (i= LSN_FILE_NO(old_flushed); i <= LSN_FILE_NO(lsn); i++) { uint cache_index; File file; - if ((cache_index= log_descriptor.horizon.file_no - i) < OPENED_FILES_NUM) + if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - i) < + OPENED_FILES_NUM) { /* file in the cache */ if (log_descriptor.log_file_num[cache_index] == 0) diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index f4d939786fc..02c272361a4 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -218,7 +218,7 @@ void translog_destroy(); part of the header */ -translog_size_t translog_read_record_header(LSN *lsn, +translog_size_t translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff); @@ -249,7 +249,7 @@ void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff); length of data actually read */ -translog_size_t translog_read_record(LSN *lsn, +translog_size_t translog_read_record(LSN lsn, translog_size_t offset, translog_size_t length, uchar *buffer, @@ -269,7 +269,7 @@ translog_size_t translog_read_record(LSN *lsn, 1 - Error */ -my_bool translog_flush(LSN *lsn); +my_bool translog_flush(LSN lsn); /* @@ -304,7 +304,7 @@ my_bool translog_flush(LSN *lsn); part of the header */ -translog_size_t translog_read_next_record_header(LSN *lsn, +translog_size_t translog_read_next_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff, my_bool fixed_horizon, struct diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index 9576d4d734d..472298de07c 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -2,11 +2,7 @@ #define _ma_loghandler_lsn_h /* Transaction log record address (file_no is int24 on the disk) */ -typedef struct st_translog_address -{ - uint32 file_no; - uint32 rec_offset; -} TRANSLOG_ADDRESS; +typedef int64 TRANSLOG_ADDRESS; /* Compare addresses @@ -14,26 +10,39 @@ typedef struct st_translog_address A1 == A2 -> 0 A1 < A2 -> result < 0 */ -#define cmp_translog_addr(A1,A2) \ - ((A1).file_no == (A2).file_no ? \ - ((int64)(A1).rec_offset) - (int64)(A2).rec_offset : \ - ((int64)(A1).file_no - (int64)(A2).file_no)) +#define cmp_translog_addr(A1,A2) ((A1) - (A2)) /* LSN type (address of certain log record chank */ typedef TRANSLOG_ADDRESS LSN; +/* Gets file number part of a LSN/log address */ +#define LSN_FILE_NO(L) ((L) >> 32) + +/* Gets raw file number part of a LSN/log address */ +#define LSN_FINE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) + +/* Gets record offset of a LSN/log address */ +#define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) + +/* Makes lsn/log address from file number and record offset */ +#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) + +/* checks LSN */ +#define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL) + /* Puts LSN into buffer (dst) */ #define lsn7store(dst, lsn) \ do { \ - int3store((dst), (lsn)->file_no); \ - int4store((dst) + 3, (lsn)->rec_offset); \ + int3store((dst), LSN_FILE_NO(lsn)); \ + int4store((dst) + 3, LSN_OFFSET(lsn)); \ } while (0) /* Unpacks LSN from the buffer (P) */ -#define lsn7korr(lsn, P) \ - do { \ - (lsn)->file_no= uint3korr(P); \ - (lsn)->rec_offset= uint4korr((P) + 3); \ - } while (0) +#define lsn7korr(P) MAKE_LSN(uint3korr(P), uint4korr((P) + 3)) + +/* what we need to add to LSN to increase it on one file */ +#define LSN_ONE_FILE ((int64)0x100000000LL) + +#define LSN_REPLACE_OFFSET(L, S) (LSN_FINE_NO_PART(L) | (S)) #endif diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index 92988b088ab..74d9059ca7d 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -30,14 +30,15 @@ LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/storage/maria/ma_loghandler.o noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t \ mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ - mf_pagecache_single_64k-t \ - mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \ - mf_pagecache_consist_1kHC-t \ - mf_pagecache_consist_64kHC-t \ - mf_pagecache_consist_1kRD-t \ - mf_pagecache_consist_64kRD-t \ - mf_pagecache_consist_1kWR-t \ - mf_pagecache_consist_64kWR-t \ + mf_pagecache_single_64k-t-big \ + mf_pagecache_consist_1k-t-big \ + mf_pagecache_consist_64k-t-big \ + mf_pagecache_consist_1kHC-t-big \ + mf_pagecache_consist_64kHC-t-big \ + mf_pagecache_consist_1kRD-t-big \ + mf_pagecache_consist_64kRD-t-big \ + mf_pagecache_consist_1kWR-t-big \ + mf_pagecache_consist_64kWR-t-big \ ma_test_loghandler-t \ ma_test_loghandler_multigroup-t \ ma_test_loghandler_multithread-t \ @@ -49,34 +50,33 @@ mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) -mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_64k_t_big_SOURCES = $(mf_pagecache_single_src) mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 -mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 +mf_pagecache_single_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 +mf_pagecache_consist_1k_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_consist_64k_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY -mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_1kHC_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_64kHC_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY -mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS -mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS +mf_pagecache_consist_1kRD_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS +mf_pagecache_consist_64kRD_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS -mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS -mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src) -mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS +mf_pagecache_consist_1kWR_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS +mf_pagecache_consist_64kWR_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS # the generic lock manager may not be used in the end and lockman1-t crashes, # so we don't build lockman-t and lockman1-t CLEANFILES = maria_control page_cache_test_file_1 \ - maria_log.???????? maria_control -noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t + maria_log.???????? diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index 00b3382cc06..9f6a6c9cf56 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -133,10 +133,8 @@ static int delete_file(myf my_flags) static int verify_module_values_match_expected() { RET_ERR_UNLESS(last_logno == expect_logno); - RET_ERR_UNLESS(last_checkpoint_lsn.file_no == - expect_checkpoint_lsn.file_no); - RET_ERR_UNLESS(last_checkpoint_lsn.rec_offset == - expect_checkpoint_lsn.rec_offset); + RET_ERR_UNLESS(last_checkpoint_lsn == + expect_checkpoint_lsn); return 0; } @@ -148,10 +146,8 @@ static int verify_module_values_match_expected() static int verify_module_values_are_impossible() { RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); - RET_ERR_UNLESS(last_checkpoint_lsn.file_no == - CONTROL_FILE_IMPOSSIBLE_FILENO); - RET_ERR_UNLESS(last_checkpoint_lsn.rec_offset == - CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET); + RET_ERR_UNLESS(last_checkpoint_lsn == + CONTROL_FILE_IMPOSSIBLE_LSN); return 0; } @@ -173,9 +169,9 @@ static int create_or_open_file() return 0; } -static int write_file(const LSN *checkpoint_lsn, - uint32 logno, - uint objs_to_write) +static int write_file(const LSN checkpoint_lsn, + uint32 logno, + uint objs_to_write) { RET_ERR_UNLESS(ma_control_file_write_and_force(checkpoint_lsn, logno, objs_to_write) == 0); @@ -191,8 +187,9 @@ static int test_one_log() RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; expect_logno= 123; - RET_ERR_UNLESS(write_file(NULL, expect_logno, - objs_to_write) == 0); + RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, + expect_logno, + objs_to_write) == 0); RET_ERR_UNLESS(close_file() == 0); return 0; } @@ -208,8 +205,8 @@ static int test_five_logs() for (i= 0; i<5; i++) { expect_logno*= 3; - RET_ERR_UNLESS(write_file(NULL, expect_logno, - objs_to_write) == 0); + RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, expect_logno, + objs_to_write) == 0); } RET_ERR_UNLESS(close_file() == 0); return 0; @@ -224,29 +221,29 @@ static int test_3_checkpoints_and_2_logs() */ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; - expect_checkpoint_lsn= (LSN){5, 10000}; - RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, - expect_logno, objs_to_write) == 0); + expect_checkpoint_lsn= MAKE_LSN(5, 10000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; expect_logno= 17; - RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, - expect_logno, objs_to_write) == 0); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; - expect_checkpoint_lsn= (LSN){17, 20000}; - RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, - expect_logno, objs_to_write) == 0); + expect_checkpoint_lsn= MAKE_LSN(17, 20000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; - expect_checkpoint_lsn= (LSN){17, 45000}; - RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, - expect_logno, objs_to_write) == 0); + expect_checkpoint_lsn= MAKE_LSN(17, 45000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; expect_logno= 19; - RET_ERR_UNLESS(write_file(&expect_checkpoint_lsn, - expect_logno, objs_to_write) == 0); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); RET_ERR_UNLESS(close_file() == 0); return 0; } @@ -274,9 +271,9 @@ static int test_binary_content() RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); i= uint4korr(buffer+5); - RET_ERR_UNLESS(i == last_checkpoint_lsn.file_no); + RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn)); i= uint4korr(buffer+9); - RET_ERR_UNLESS(i == last_checkpoint_lsn.rec_offset); + RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn)); i= uint4korr(buffer+13); RET_ERR_UNLESS(i == last_logno); RET_ERR_UNLESS(close_file() == 0); diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index 1cbfcac504e..afbef150b62 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -1,6 +1,7 @@ #include "../maria_def.h" #include #include +#include #ifndef DBUG_OFF static const char *default_dbug_option; @@ -83,7 +84,7 @@ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, uchar *buffer, uint skip) { DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2); - if (translog_read_record(&rec->lsn, 0, rec->record_length, buffer, NULL) != + if (translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL) != rec->record_length) return 1; return check_content(buffer + skip, rec->record_length - skip); @@ -103,7 +104,7 @@ int main(int argc, char *argv[]) }; uchar long_buffer[LONG_BUFFER_SIZE * 2 + 7 * 2 + 2]; PAGECACHE pagecache; - LSN lsn, lsn_base, first_lsn, *lsn_ptr; + LSN lsn, lsn_base, first_lsn, lsn_ptr; TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; int rc; @@ -172,7 +173,7 @@ int main(int argc, char *argv[]) printf("write %d\n", i); if (i % 2) { - lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff, lsn_base); if (translog_write_record(&lsn, LOGREC_CLR_END, (i % 0xFFFF), NULL, 7, lsn_buff, 0)) @@ -182,7 +183,7 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff, lsn_base); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) rec_len= 12; if (translog_write_record(&lsn, @@ -198,8 +199,8 @@ int main(int argc, char *argv[]) } else { - lsn7store(lsn_buff, &lsn_base); - lsn7store(lsn_buff + 7, &first_lsn); + lsn7store(lsn_buff, lsn_base); + lsn7store(lsn_buff + 7, first_lsn); if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, (i % 0xFFFF), NULL, 23, lsn_buff, 0)) @@ -209,8 +210,8 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, &lsn_base); - lsn7store(lsn_buff + 7, &first_lsn); + lsn7store(lsn_buff, lsn_base); + lsn7store(lsn_buff + 7, first_lsn); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) rec_len= 19; if (translog_write_record(&lsn, @@ -246,7 +247,7 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - if (translog_flush(&lsn)) + if (translog_flush(lsn)) { fprintf(stderr, "Can't flush #%lu\n", (ulong) i); translog_destroy(); @@ -282,7 +283,7 @@ int main(int argc, char *argv[]) rc= 1; { - translog_size_t len= translog_read_record_header(&first_lsn, &rec); + translog_size_t len= translog_read_record_header(first_lsn, &rec); if (len == 0) { fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); @@ -291,19 +292,19 @@ int main(int argc, char *argv[]) if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || rec.record_length != 6 || uint4korr(rec.header) != 0 || (uint)rec.header[4] != 0 || rec.header[5] != 0xFF || - first_lsn.file_no != rec.lsn.file_no || - first_lsn.rec_offset != rec.lsn.rec_offset) + first_lsn != rec.lsn) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " - "lsn(0x%lx,0x%lx)\n", + "lsn(%lu,0x%lx)\n", (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5], - (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (uint) uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } lsn= first_lsn; - lsn_ptr= &first_lsn; + lsn_ptr= first_lsn; for (i= 1;; i++) { if (i % 1000 == 0) @@ -315,7 +316,7 @@ int main(int argc, char *argv[]) i, errno); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { if (i != ITERATIONS) { @@ -325,37 +326,35 @@ int main(int argc, char *argv[]) } break; } - lsn_ptr= NULL; /* use scanner after its - initialization */ + /* use scanner after its initialization */ + lsn_ptr= 0; if (i % 2) { LSN ref; - lsn7korr(&ref, rec.header); + ref= lsn7korr(rec.header); if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || - rec.record_length != 7 || ref.file_no != lsn.file_no || - ref.rec_offset != lsn.rec_offset) + rec.record_length != 7 || ref != lsn) { - fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)" - "type %u, strid %u, len %u, ref(%u,0x%lx), lsn(%u,0x%lx)\n", + fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d) " + "type %u, strid %u, len %u, ref(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - (uint) ref.file_no, (ulong) ref.rec_offset, - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } } else { LSN ref1, ref2; - lsn7korr(&ref1, rec.header); - lsn7korr(&ref2, rec.header + 7); + ref1= lsn7korr(rec.header); + ref2= lsn7korr(rec.header + 7); if (rec.type !=LOGREC_UNDO_ROW_DELETE || rec.short_trid != (i % 0xFFFF) || rec.record_length != 23 || - ref1.file_no != lsn.file_no || - ref1.rec_offset != lsn.rec_offset || - ref2.file_no != first_lsn.file_no || - ref2.rec_offset != first_lsn.rec_offset || + ref1 != lsn || + ref2 != first_lsn || rec.header[22] != 0x55 || rec.header[21] != 0xAA || rec.header[20] != 0x55 || rec.header[19] != 0xAA || rec.header[18] != 0x55 || rec.header[17] != 0xAA || @@ -363,19 +362,19 @@ int main(int argc, char *argv[]) rec.header[14] != 0x55) { fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" - "type %u, strid %u, len %u, ref1(%u,0x%lx), " - "ref2(%u,0x%lx) %x%x%x%x%x%x%x%x%x " - "lsn(%u,0x%lx)\n", + "type %u, strid %u, len %u, ref1(%lu,0x%lx), " + "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - (uint) ref1.file_no, (ulong) ref1.rec_offset, - (uint) ref2.file_no, (ulong) ref2.rec_offset, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), (uint) rec.header[14], (uint) rec.header[15], (uint) rec.header[16], (uint) rec.header[17], (uint) rec.header[18], (uint) rec.header[19], (uint) rec.header[20], (uint) rec.header[21], (uint) rec.header[22], - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } } @@ -386,7 +385,7 @@ int main(int argc, char *argv[]) "failed (%d)\n", i, errno); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { fprintf(stderr, "EOL met at the middle of iteration (first var) %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -395,20 +394,19 @@ int main(int argc, char *argv[]) if (i % 2) { LSN ref; - lsn7korr(&ref, rec.header); + ref= lsn7korr(rec.header); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) rec_len= 12; if (rec.type !=LOGREC_UNDO_KEY_INSERT || rec.short_trid != (i % 0xFFFF) || rec.record_length != rec_len + 7 || - len != 12 || ref.file_no != lsn.file_no || - ref.rec_offset != lsn.rec_offset || + len != 12 || ref != lsn || check_content(rec.header + 7, len - 7)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " "hdr len: %u (%d), " - "ref(%u,0x%lx), lsn(%u,0x%lx) (%d), content: %d\n", + "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n", i, (uint) rec.type, rec.type !=LOGREC_UNDO_KEY_INSERT, (uint) rec.short_trid, @@ -417,10 +415,9 @@ int main(int argc, char *argv[]) rec.record_length != rec_len + 7, (uint) len, len != 12, - (uint) ref.file_no, (ulong) ref.rec_offset, - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, - (len != 12 || ref.file_no != lsn.file_no || - ref.rec_offset != lsn.rec_offset), + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), + (len != 12 || ref != lsn), check_content(rec.header + 7, len - 7)); goto err; } @@ -428,46 +425,44 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } } else { LSN ref1, ref2; - lsn7korr(&ref1, rec.header); - lsn7korr(&ref2, rec.header + 7); + ref1= lsn7korr(rec.header); + ref2= lsn7korr(rec.header + 7); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) rec_len= 19; if (rec.type !=LOGREC_UNDO_KEY_DELETE || rec.short_trid != (i % 0xFFFF) || rec.record_length != rec_len + 14 || len != 19 || - ref1.file_no != lsn.file_no || - ref1.rec_offset != lsn.rec_offset || - ref2.file_no != first_lsn.file_no || - ref2.rec_offset != first_lsn.rec_offset || + ref1 != lsn || + ref2 != first_lsn || check_content(rec.header + 14, len - 14)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, " - "ref1(%u,0x%lx), ref2(%u,0x%lx), " - "lsn(%u,0x%lx)\n", + "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (ulong) rec.record_length, (ulong) rec_len, (uint) len, - (uint) ref1.file_no, (ulong) ref1.rec_offset, - (uint) ref2.file_no, (ulong) ref2.rec_offset, - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } if (read_and_check_content(&rec, long_buffer, 14)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } } @@ -479,7 +474,7 @@ int main(int argc, char *argv[]) i, errno); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { fprintf(stderr, "EOL met at the middle of iteration %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -492,12 +487,12 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " - "lsn(%u,0x%lx)\n", + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - uint4korr(rec.header), (uint) rec.header[4], + (uint) uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5], - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } @@ -513,18 +508,19 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)" "type %u, strid %u, len %lu != %lu, hdr len: %u, " - "lsn(%u,0x%lx)\n", + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (ulong) rec.record_length, (ulong) rec_len, - (uint) len, (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (uint) len, + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } if (read_and_check_content(&rec, long_buffer, 0)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } } diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index abb12faa015..1d5ad9c81d8 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -1,6 +1,7 @@ #include "../maria_def.h" #include #include +#include #ifndef DBUG_OFF static const char *default_dbug_option; @@ -84,7 +85,7 @@ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, translog_size_t len; DBUG_ENTER("read_and_check_content"); DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2); - if ((len= translog_read_record(&rec->lsn, 0, rec->record_length, + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL)) != rec->record_length) { fprintf(stderr, "Requested %lu byte, read %lu\n", @@ -121,7 +122,7 @@ int main(int argc, char *argv[]) }; uchar *long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); PAGECACHE pagecache; - LSN lsn, lsn_base, first_lsn, *lsn_ptr; + LSN lsn, lsn_base, first_lsn, lsn_ptr; TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; int rc; @@ -194,7 +195,7 @@ int main(int argc, char *argv[]) printf("write %d\n", i); if (i % 2) { - lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff, lsn_base); if (translog_write_record(&lsn, LOGREC_CLR_END, (i % 0xFFFF), NULL, 7, lsn_buff, 0)) @@ -204,7 +205,7 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, &lsn_base); + lsn7store(lsn_buff, lsn_base); rec_len= get_len(); if (translog_write_record(&lsn, LOGREC_UNDO_KEY_INSERT, @@ -219,8 +220,8 @@ int main(int argc, char *argv[]) } else { - lsn7store(lsn_buff, &lsn_base); - lsn7store(lsn_buff + 7, &first_lsn); + lsn7store(lsn_buff, lsn_base); + lsn7store(lsn_buff + 7, first_lsn); if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, (i % 0xFFFF), NULL, 23, lsn_buff, 0)) @@ -230,8 +231,8 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, &lsn_base); - lsn7store(lsn_buff + 7, &first_lsn); + lsn7store(lsn_buff, lsn_base); + lsn7store(lsn_buff + 7, first_lsn); rec_len= get_len(); if (translog_write_record(&lsn, LOGREC_UNDO_KEY_DELETE, @@ -294,7 +295,7 @@ int main(int argc, char *argv[]) rc= 1; { - translog_size_t len= translog_read_record_header(&first_lsn, &rec); + translog_size_t len= translog_read_record_header(first_lsn, &rec); if (len == 0) { fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); @@ -304,21 +305,21 @@ int main(int argc, char *argv[]) if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || rec.record_length != 6 || uint4korr(rec.header) != 0 || (uint)rec.header[4] != 0 || rec.header[5] != 0xFF || - first_lsn.file_no != rec.lsn.file_no || - first_lsn.rec_offset != rec.lsn.rec_offset) + first_lsn != rec.lsn) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " - "lsn(0x%lx,0x%lx)\n", + "lsn(0x%lu,0x%lx)\n", (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5], - (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (uint)uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } translog_free_record_header(&rec); lsn= first_lsn; - lsn_ptr= &first_lsn; + lsn_ptr= first_lsn; for (i= 1;; i++) { if (i % SHOW_DIVIDER == 0) @@ -331,7 +332,7 @@ int main(int argc, char *argv[]) translog_free_record_header(&rec); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { if (i != ITERATIONS) { @@ -342,23 +343,22 @@ int main(int argc, char *argv[]) } break; } - lsn_ptr= NULL; /* use scanner after its - initialization */ + /* use scanner after its initialization */ + lsn_ptr= 0; if (i % 2) { LSN ref; - lsn7korr(&ref, rec.header); - if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || - rec.record_length != 7 || ref.file_no != lsn.file_no || - ref.rec_offset != lsn.rec_offset) + ref= lsn7korr(rec.header); + if (rec.type != LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || + rec.record_length != 7 || ref != lsn) { fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)" - "type %u, strid %u, len %u, ref(%u,0x%lx), lsn(%u,0x%lx)\n", + "type %u, strid %u, len %u, ref(%lu,0x%lx), lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - (uint) ref.file_no, (ulong) ref.rec_offset, - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -366,15 +366,13 @@ int main(int argc, char *argv[]) else { LSN ref1, ref2; - lsn7korr(&ref1, rec.header); - lsn7korr(&ref2, rec.header + 7); + ref1= lsn7korr(rec.header); + ref2= lsn7korr(rec.header + 7); if (rec.type !=LOGREC_UNDO_ROW_DELETE || rec.short_trid != (i % 0xFFFF) || rec.record_length != 23 || - ref1.file_no != lsn.file_no || - ref1.rec_offset != lsn.rec_offset || - ref2.file_no != first_lsn.file_no || - ref2.rec_offset != first_lsn.rec_offset || + ref1 != lsn || + ref2 != first_lsn || rec.header[22] != 0x55 || rec.header[21] != 0xAA || rec.header[20] != 0x55 || rec.header[19] != 0xAA || rec.header[18] != 0x55 || rec.header[17] != 0xAA || @@ -382,19 +380,19 @@ int main(int argc, char *argv[]) rec.header[14] != 0x55) { fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" - "type %u, strid %u, len %u, ref1(%u,0x%lx), " - "ref2(%u,0x%lx) %x%x%x%x%x%x%x%x%x " - "lsn(%u,0x%lx)\n", + "type %u, strid %u, len %u, ref1(%lu,0x%lx), " + "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - (uint) ref1.file_no, (ulong) ref1.rec_offset, - (uint) ref2.file_no, (ulong) ref2.rec_offset, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), (uint) rec.header[14], (uint) rec.header[15], (uint) rec.header[16], (uint) rec.header[17], (uint) rec.header[18], (uint) rec.header[19], (uint) rec.header[20], (uint) rec.header[21], (uint) rec.header[22], - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -408,7 +406,7 @@ int main(int argc, char *argv[]) "failed (%d)\n", i, errno); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { fprintf(stderr, "EOL met at the middle of iteration (first var) %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -417,19 +415,18 @@ int main(int argc, char *argv[]) if (i % 2) { LSN ref; - lsn7korr(&ref, rec.header); + ref= lsn7korr(rec.header); rec_len= get_len(); if (rec.type !=LOGREC_UNDO_KEY_INSERT || rec.short_trid != (i % 0xFFFF) || rec.record_length != rec_len + 7 || - len != 12 || ref.file_no != lsn.file_no || - ref.rec_offset != lsn.rec_offset || + len != 12 || ref != lsn || check_content(rec.header + 7, len - 7)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " "hdr len: %u (%d), " - "ref(%u,0x%lx), lsn(%u,0x%lx) (%d), content: %d\n", + "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n", i, (uint) rec.type, rec.type !=LOGREC_UNDO_KEY_INSERT, (uint) rec.short_trid, @@ -438,10 +435,9 @@ int main(int argc, char *argv[]) rec.record_length != rec_len + 7, (uint) len, len != 12, - (uint) ref.file_no, (ulong) ref.rec_offset, - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, - (ref.file_no != lsn.file_no || - ref.rec_offset != lsn.rec_offset), + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), + (ref != lsn), check_content(rec.header + 7, len - 7)); translog_free_record_header(&rec); goto err; @@ -450,8 +446,8 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -459,29 +455,27 @@ int main(int argc, char *argv[]) else { LSN ref1, ref2; - lsn7korr(&ref1, rec.header); - lsn7korr(&ref2, rec.header + 7); + ref1= lsn7korr(rec.header); + ref2= lsn7korr(rec.header + 7); rec_len= get_len(); if (rec.type !=LOGREC_UNDO_KEY_DELETE || rec.short_trid != (i % 0xFFFF) || rec.record_length != rec_len + 14 || len != 19 || - ref1.file_no != lsn.file_no || - ref1.rec_offset != lsn.rec_offset || - ref2.file_no != first_lsn.file_no || - ref2.rec_offset != first_lsn.rec_offset || + ref1 != lsn || + ref2 != first_lsn || check_content(rec.header + 14, len - 14)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, " - "ref1(%u,0x%lx), ref2(%u,0x%lx), " - "lsn(%u,0x%lx)\n", + "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (ulong) rec.record_length, (ulong) rec_len, (uint) len, - (uint) ref1.file_no, (ulong) ref1.rec_offset, - (uint) ref2.file_no, (ulong) ref2.rec_offset, - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -489,8 +483,8 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -505,7 +499,7 @@ int main(int argc, char *argv[]) translog_free_record_header(&rec); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { fprintf(stderr, "EOL met at the middle of iteration %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -519,12 +513,12 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " - "lsn(%u,0x%lx)\n", + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, - uint4korr(rec.header), (uint) rec.header[4], + (uint)uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5], - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -541,10 +535,11 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)" "type %u, strid %u, len %lu != %lu, hdr len: %u, " - "lsn(%u,0x%lx)\n", + "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (ulong) rec.record_length, (ulong) rec_len, - (uint) len, (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + (uint) len, + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } @@ -552,8 +547,8 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index 794dc6dd255..ed5479026ef 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -1,6 +1,7 @@ #include "../maria_def.h" #include #include +#include #ifndef DBUG_OFF static const char *default_dbug_option; @@ -102,7 +103,7 @@ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, translog_size_t len; DBUG_ENTER("read_and_check_content"); DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2); - if ((len= translog_read_record(&rec->lsn, 0, rec->record_length, + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL)) != rec->record_length) { fprintf(stderr, "Requested %lu byte, read %lu\n", @@ -149,16 +150,17 @@ void writer(int num) DBUG_PRINT("info", ("thread: %u, iteration: %u, len: %lu, " "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)", num, i, (ulong) lens[num][i], - (ulong) lsns1[num][i].file_no, - (ulong) lsns1[num][i].rec_offset, - (ulong) lsns2[num][i].file_no, - (ulong) lsns2[num][i].rec_offset)); + (ulong) LSN_FILE_NO(lsns1[num][i]), + (ulong) LSN_OFFSET(lsns1[num][i]), + (ulong) LSN_FILE_NO(lsns2[num][i]), + (ulong) LSN_OFFSET(lsns2[num][i]))); printf("thread: %u, iteration: %u, len: %lu, " "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)\n", num, i, (ulong) lens[num][i], - (ulong) lsns1[num][i].file_no, - (ulong) lsns1[num][i].rec_offset, - (ulong) lsns2[num][i].file_no, (ulong) lsns2[num][i].rec_offset); + (ulong) LSN_FILE_NO(lsns1[num][i]), + (ulong) LSN_OFFSET(lsns1[num][i]), + (ulong) LSN_FILE_NO(lsns2[num][i]), + (ulong) LSN_OFFSET(lsns2[num][i])); } DBUG_VOID_RETURN; } @@ -191,7 +193,7 @@ int main(int argc, char **argv __attribute__ ((unused))) uint32 i; uint pagen; PAGECACHE pagecache; - LSN first_lsn, *lsn_ptr; + LSN first_lsn, lsn_ptr; TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; pthread_t tid; @@ -344,20 +346,18 @@ int main(int argc, char **argv __attribute__ ((unused))) /* Find last LSN and flush up to it (all our log) */ { - LSN max= - { - 0, 0 - }; + LSN max= 0; for (i= 0; i < WRITERS; i++) { if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0) max= lsns2[i][ITERATIONS - 1]; } DBUG_PRINT("info", ("first lsn: (%lu,0x%lx), max lsn: (%lu,0x%lx)", - (ulong) first_lsn.file_no, - (ulong) first_lsn.rec_offset, - (ulong) max.file_no, (ulong) max.rec_offset)); - translog_flush(&max); + (ulong) LSN_FILE_NO(first_lsn), + (ulong) LSN_OFFSET(first_lsn), + (ulong) LSN_FILE_NO(max), + (ulong) LSN_OFFSET(max))); + translog_flush(max); } rc= 1; @@ -369,11 +369,11 @@ int main(int argc, char **argv __attribute__ ((unused))) bzero(indeces, sizeof(indeces)); - lsn_ptr= &first_lsn; + lsn_ptr= first_lsn; for (i= 0;; i++) { len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); - lsn_ptr= NULL; + lsn_ptr= 0; if (len == 0) { @@ -382,7 +382,7 @@ int main(int argc, char **argv __attribute__ ((unused))) translog_free_record_header(&rec); goto err; } - if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) { if (i != WRITERS * ITERATIONS * 2) { @@ -412,9 +412,10 @@ int main(int argc, char **argv __attribute__ ((unused))) (uint) rec.short_trid, (uint) uint2korr(rec.header), (uint) rec.record_length, (uint) index, (uint) uint4korr(rec.header + 2), - (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, - (ulong) lsns1[rec.short_trid][index].file_no, - (ulong) lsns1[rec.short_trid][index].rec_offset); + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn), + (ulong) LSN_FILE_NO(lsns1[rec.short_trid][index]), + (ulong) LSN_OFFSET(lsns1[rec.short_trid][index])); translog_free_record_header(&rec); goto err; } @@ -437,9 +438,10 @@ int main(int argc, char **argv __attribute__ ((unused))) (uint) len, (ulong) rec.record_length, lens[rec.short_trid][index], (rec.record_length != lens[rec.short_trid][index]), - (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset, - (ulong) lsns2[rec.short_trid][index].file_no, - (ulong) lsns2[rec.short_trid][index].rec_offset); + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn), + (ulong) LSN_FILE_NO(lsns2[rec.short_trid][index]), + (ulong) LSN_OFFSET(lsns2[rec.short_trid][index])); translog_free_record_header(&rec); goto err; } @@ -447,8 +449,9 @@ int main(int argc, char **argv __attribute__ ((unused))) { fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD in whole rec read " - "lsn(%u,0x%lx)\n", - (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset); + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn)); translog_free_record_header(&rec); goto err; } diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index 6771b5f888d..f215805d829 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -1,6 +1,7 @@ #include "../maria_def.h" #include #include +#include #ifndef DBUG_OFF static const char *default_dbug_option; @@ -106,7 +107,7 @@ int main(int argc, char *argv[]) bzero(page, PCACHE_PAGE); #define PAGE_LSN_OFFSET 0 - lsn7store(page + PAGE_LSN_OFFSET, &lsn); + lsn7store(page + PAGE_LSN_OFFSET, lsn); pagecache_write(&pagecache, &file1, 0, 3, (char*)page, PAGECACHE_LSN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, diff --git a/storage/maria/unittest/mf_pagecache_consist.c b/storage/maria/unittest/mf_pagecache_consist.c index 14105cb85af..8ea0094762c 100755 --- a/storage/maria/unittest/mf_pagecache_consist.c +++ b/storage/maria/unittest/mf_pagecache_consist.c @@ -8,6 +8,7 @@ #include #include #include "test_file.h" +#include #define PCACHE_SIZE (PAGE_SIZE*1024*8) diff --git a/storage/maria/unittest/mf_pagecache_single.c b/storage/maria/unittest/mf_pagecache_single.c index 6b1d57f5f91..91cceee618d 100644 --- a/storage/maria/unittest/mf_pagecache_single.c +++ b/storage/maria/unittest/mf_pagecache_single.c @@ -7,6 +7,7 @@ #include #include #include "test_file.h" +#include #define PCACHE_SIZE (PAGE_SIZE*1024*10) @@ -235,7 +236,7 @@ int simple_pin_test() 0, PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN, - 0, 0); + 0); if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) { diag("Got error in flush_pagecache_blocks\n"); @@ -364,7 +365,7 @@ int simple_big_test() 0); } desc[i].length= 0; - desc[i].content= NULL; + desc[i].content= '\0'; ok(1, "Simple big file write"); /* check written pages sequentally read */ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) From 3bc8f629dd35d832cbee14a26c187cb76e78bf6d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 19 Feb 2007 23:01:27 +0200 Subject: [PATCH 89/95] =?UTF-8?q?Postreview=20changes.=20Fixed=20befaviour?= =?UTF-8?q?=20when=20loghandler=20flags=20changed=20from=20one=20run=20to?= =?UTF-8?q?=20another=20one.=20Description=20of=20maria=20transaction=20lo?= =?UTF-8?q?g=20and=20control=20file=20added=20to=20the=20file=20command?= =?UTF-8?q?=C3=8Ds=20magic=20number=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mysys/mf_pagecache.c: postreview changes storage/maria/ma_control_file.c: Postreview changes. storage/maria/ma_control_file.h: Postreview changes. storage/maria/ma_loghandler.c: Postreview changes. Fixed befaviour when loghandler flags changed from one run to another one. storage/maria/ma_loghandler.h: Postreview changes. Functions comment left only near the function body. storage/maria/ma_loghandler_lsn.h: Postreview changes. storage/maria/unittest/ma_test_loghandler-t.c: Postreview changes. storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Postreview changes. storage/maria/unittest/ma_test_loghandler_multithread-t.c: Postreview changes. storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Postreview changes. support-files/magic: Description of maria transaction log and control file added to the file commandÕs magic number file. --- mysys/mf_pagecache.c | 2 +- storage/maria/ma_control_file.c | 11 +- storage/maria/ma_control_file.h | 25 +- storage/maria/ma_loghandler.c | 2216 ++++++++--------- storage/maria/ma_loghandler.h | 252 +- storage/maria/ma_loghandler_lsn.h | 7 +- storage/maria/unittest/ma_test_loghandler-t.c | 100 +- .../ma_test_loghandler_multigroup-t.c | 109 +- .../ma_test_loghandler_multithread-t.c | 23 +- .../unittest/ma_test_loghandler_pagecache-t.c | 2 +- support-files/magic | 7 + 11 files changed, 1267 insertions(+), 1487 deletions(-) diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 9ef3d5841b5..1b9d48c80e6 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -570,7 +570,7 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, DBUG_PRINT("info", ("Log handler call")); /* TODO: integrate with page format */ #define PAGE_LSN_OFFSET 0 - lsn= lsn7korr(buffer + PAGE_LSN_OFFSET); + lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); /* check CONTROL_FILE_IMPOSSIBLE_FILENO & CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 3f9af34b2f1..07eddb956a2 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -21,7 +21,6 @@ */ #include "maria_def.h" -#include "ma_control_file.h" /* Here is the implementation of this module */ @@ -31,13 +30,13 @@ */ /* total size should be < sector size for atomic write operation */ -#define CONTROL_FILE_MAGIC_STRING "MACF" +#define CONTROL_FILE_MAGIC_STRING "\xfe\xfe\xc\1MACF" #define CONTROL_FILE_MAGIC_STRING_OFFSET 0 #define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1) #define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) #define CONTROL_FILE_CHECKSUM_SIZE 1 #define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE) -#define CONTROL_FILE_LSN_SIZE (3+4) +#define CONTROL_FILE_LSN_SIZE LSN_STORE_SIZE #define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) #define CONTROL_FILE_FILENO_SIZE 4 #define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) @@ -200,7 +199,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() error= CONTROL_FILE_BAD_CHECKSUM; goto err; } - last_checkpoint_lsn= lsn7korr(buffer + CONTROL_FILE_LSN_OFFSET); + last_checkpoint_lsn= lsn_korr(buffer + CONTROL_FILE_LSN_OFFSET); last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); DBUG_RETURN(0); @@ -261,9 +260,9 @@ int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, DBUG_ASSERT(0); if (update_checkpoint_lsn) - lsn7store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); + lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); else /* store old value == change nothing */ - lsn7store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn); + lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn); if (update_logno) int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno); diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 8e5dafac24c..616babc3bb2 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -19,9 +19,6 @@ First version written by Guilhem Bichot on 2006-04-27. */ -#ifndef _ma_control_file_h -#define _ma_control_file_h - #define CONTROL_FILE_BASE_NAME "maria_control" /* indicate absence of the log file number; first log is always number 1, 0 is @@ -47,11 +44,6 @@ extern LSN last_checkpoint_lsn; */ extern uint32 last_logno; -/* - Looks for the control file. If absent, it's a fresh start, create file. - If present, read it to find out last checkpoint's LSN and last log. - Called at engine's start. -*/ typedef enum enum_control_file_error { CONTROL_FILE_OK= 0, CONTROL_FILE_TOO_SMALL, @@ -60,21 +52,26 @@ typedef enum enum_control_file_error { CONTROL_FILE_BAD_CHECKSUM, CONTROL_FILE_UNKNOWN_ERROR /* any other error */ } CONTROL_FILE_ERROR; -CONTROL_FILE_ERROR ma_control_file_create_or_open(); +#define CONTROL_FILE_UPDATE_ALL 0 +#define CONTROL_FILE_UPDATE_ONLY_LSN 1 +#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2 + + +/* + Looks for the control file. If absent, it's a fresh start, create file. + If present, read it to find out last checkpoint's LSN and last log. + Called at engine's start. +*/ +CONTROL_FILE_ERROR ma_control_file_create_or_open(); /* Write information durably to the control file. Called when we have created a new log (after syncing this log's creation) and when we have written a checkpoint (after syncing this log record). */ -#define CONTROL_FILE_UPDATE_ALL 0 -#define CONTROL_FILE_UPDATE_ONLY_LSN 1 -#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2 int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, uint objs_to_write); /* Free resources taken by control file subsystem */ int ma_control_file_end(); - -#endif diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 01b4c68f12f..dd12c73770b 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -1,7 +1,6 @@ #include "maria_def.h" -#include -/* number of opened log files in the pagecache (should be at lesst 2) */ +/* number of opened log files in the pagecache (should be at least 2) */ #define OPENED_FILES_NUM 3 /* records buffer size (should be LOG_PAGE_SIZE * n) */ @@ -14,16 +13,18 @@ Should be at least 4, because one thread can block up to 2 buffers in normal circumstances (less then half of one and full other, or just switched one and other), But if we met end of the file in the middle and - have to switch buffer it will be 3. + 1 or 2 buffer for flushing/writing. + have to switch buffer it will be 3. + 1 buffer for flushing/writing. + We have a bigger number here for higher concurrency. */ #define TRANSLOG_BUFFERS_NO 5 -/* number of bytes which is worth to be left on first page */ +/* number of bytes (+ header) which can be unused on first page in sequence */ #define TRANSLOG_MINCHUNK_CONTENT 1 -/* length of transaction log file name maria_log.XXXXXXXX*/ -#define TRANSLOG_FILE_NAME_LENGTH 18 /* version of log file */ -#define TRANSLOG_VERSION_ID 10000 +#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */ +#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */ + +/* QQ: For temporary debugging */ #define UNRECOVERABLE_ERROR(E) \ do { \ DBUG_PRINT("error", E); \ @@ -32,13 +33,15 @@ } while(0); + /* record part descriptor */ struct st_translog_part { translog_size_t len; - uchar *buff; + byte *buff; }; + /* record parts descriptor */ struct st_translog_parts { @@ -62,39 +65,39 @@ struct st_translog_buffer How much written (or will be written when copy_to_buffer_in_progress become 0) to this buffer */ - uint32 size; - /* This Buffer File */ + translog_size_t size; + /* File handler for this buffer */ File file; /* Threads which are waiting for buffer filling/freeing */ WQUEUE waiting_filling_buffer; /* Number of record which are in copy progress */ - int16 copy_to_buffer_in_progress; + uint copy_to_buffer_in_progress; /* list of waiting buffer ready threads */ struct st_my_thread_var *waiting_flush; - /* lock for the buffer. Current buffer also lock the handler */ - pthread_mutex_t mutex; struct st_translog_buffer *overlay; #ifndef DBUG_OFF - struct st_my_thread_var *locked_by; - uint8 buffer_no; + uint buffer_no; #endif + /* lock for the buffer. Current buffer also lock the handler */ + pthread_mutex_t mutex; /* IO cache for current log */ - uchar buffer[TRANSLOG_WRITE_BUFFER]; + byte buffer[TRANSLOG_WRITE_BUFFER]; }; struct st_buffer_cursor { /* pointer on the buffer */ - uchar *ptr; + byte *ptr; + /* current buffer */ + struct st_translog_buffer *buffer; /* current page fill */ - uint16 current_page_size; + uint16 current_page_fill; /* how many times we finish this page to write it */ uint16 write_counter; /* previous write offset */ uint16 previous_offset; - /* current buffer and its number */ - struct st_translog_buffer *buffer; + /* Number of current buffer */ uint8 buffer_no; my_bool chaser, protected; }; @@ -104,30 +107,31 @@ struct st_translog_descriptor { /* *** Parameters of the log handler *** */ - /* Directory to store files */ - char directory[FN_REFLEN]; + /* Page cache for the log reads */ + PAGECACHE *pagecache; + /* Flags */ + uint flags; /* max size of one log size (for new logs creation) */ uint32 log_file_max_size; /* server version */ uint32 server_version; /* server ID */ uint32 server_id; - /* Page cache for the log reads */ - PAGECACHE *pagecache; - /* Flags */ - uint flags; - /* Page overhead calculated by flags */ - uint16 page_overhead; - /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */ - uint16 page_capacity_chunk_2; /* Loghandler's buffer capacity in case of chunk 2 filling */ uint32 buffer_capacity_chunk_2; /* Half of the buffer capacity in case of chunk 2 filling */ uint32 half_buffer_capacity_chunk_2; + /* Page overhead calculated by flags */ + uint16 page_overhead; + /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */ + uint16 page_capacity_chunk_2; + /* Directory to store files */ + char directory[FN_REFLEN]; /* *** Current state of the log handler *** */ /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */ File log_file_num[OPENED_FILES_NUM]; + File directory_fd; /* buffers for log writing */ struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO]; /* @@ -142,12 +146,12 @@ struct st_translog_descriptor LSN flushed; LSN sent_to_file; pthread_mutex_t sent_to_file_lock; - File directory_fd; }; static struct st_translog_descriptor log_descriptor; -static uchar end_of_log= 0; +/* Marker for end of log */ +static byte end_of_log= 0; /* record classes */ enum record_class @@ -159,21 +163,16 @@ enum record_class }; /* chunk types */ -#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head - or tail */ -#define TRANSLOG_CHUNK_FIXED 0x40 /* 1 (pseudo)fixed record (also - LSN) */ -#define TRANSLOG_CHUNK_NOHDR 0x80 /* 2 no header chunk (till page - end) */ -#define TRANSLOG_CHUNK_LNGTH 0xC0 /* 3 chunk with chunk length */ -#define TRANSLOG_CHUNK_TYPE 0xC0 /* Mask to get chunk type */ +#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */ +#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */ +#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */ +#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */ +#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */ #define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */ /* compressed (relative) LSN constants */ -#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN - length */ -#define TRANSLOG_CLSN_MAX_LEN 5 /* Maximum length of compressed - LSN */ +#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */ +#define TRANSLOG_CLSN_MAX_LEN 5 /* Maximum length of compressed LSN */ typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, void *tcb, @@ -184,11 +183,14 @@ typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, LSN *lsn, struct st_translog_parts *parts); -typedef int16(*read_rec_hook) (enum translog_record_type type, - int16 read_length, uchar *read_buff, - uchar *decoded_buff); +typedef uint16(*read_rec_hook) (enum translog_record_type type, + uint16 read_length, uchar *read_buff, + byte *decoded_buff); -/* Descriptor of log record type */ +/* + Descriptor of log record type + Note: Don't reorder because of constructs later... +*/ struct st_log_record_type_descriptor { /* internal class of the record */ @@ -207,9 +209,10 @@ struct st_log_record_type_descriptor For pseudo fixed records number of compressed LSNs followed by system header */ - int16 compresed_LSN; + int16 compressed_LSN; }; + static struct st_log_record_type_descriptor log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]= { @@ -343,6 +346,8 @@ static struct st_log_record_type_descriptor {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0} }; +/* all possible flags page overheads */ +static uint page_overhead[TRANSLOG_FLAGS_NUM]; typedef struct st_translog_validator_data { @@ -368,12 +373,13 @@ const char *maria_data_root; static char *translog_filename_by_fileno(uint32 file_no, char *path) { - char file_name[10 + 8 + 1]; + char file_name[10 + 8 + 1]; /* See my_sprintf */ char *res; DBUG_ENTER("translog_filename_by_fileno"); + DBUG_ASSERT(file_no <= 0xfffffff); my_sprintf(file_name, (file_name, "maria_log.%08u", file_no)); res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME)); - DBUG_PRINT("info", ("Path '%s', path: 0x%lx, res: 0x%lx", + DBUG_PRINT("info", ("Path: '%s' path: 0x%lx res: 0x%lx", res, (ulong) path, (ulong) res)); DBUG_RETURN(res); } @@ -387,8 +393,8 @@ static char *translog_filename_by_fileno(uint32 file_no, char *path) file_no Number of the log we want to open RETURN - 0 error - file descriptor number + -1 error + # file descriptor number */ static File open_logfile_by_number_no_cache(uint32 file_no) @@ -397,15 +403,15 @@ static File open_logfile_by_number_no_cache(uint32 file_no) char path[FN_REFLEN]; DBUG_ENTER("open_logfile_by_number_no_cache"); - if ((file= my_open(translog_filename_by_fileno(file_no, path), O_CREAT | O_BINARY | /* O_DIRECT - | - */ O_RDWR, + /* Todo: add O_DIRECT to open flags (when buffer is aligned) */ + if ((file= my_open(translog_filename_by_fileno(file_no, path), + O_CREAT | O_BINARY | O_RDWR, MYF(MY_WME))) < 0) { UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path)); - DBUG_RETURN(0); + DBUG_RETURN(-1); } - DBUG_PRINT("info", ("File '%s', handler %d", path, file)); + DBUG_PRINT("info", ("File: '%s' handler: %d", path, file)); DBUG_RETURN(file); } @@ -416,42 +422,50 @@ static File open_logfile_by_number_no_cache(uint32 file_no) SYNOPSIS translog_write_file_header(); + NOTES + First page is just a marker page; We don't store any real log data in it. + RETURN 0 OK 1 ERROR */ +uchar NEAR maria_trans_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A', + 'L', 'O', 'G' }; + static my_bool translog_write_file_header() { ulonglong timestamp; - char page[TRANSLOG_PAGE_SIZE]; + byte page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff; DBUG_ENTER("translog_write_file_header"); /* file tag */ - strnmov(page, "MARIALOG", 8); + memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic)); + page+= sizeof(maria_trans_file_magic); /* timestamp */ timestamp= my_getsystime(); - int8store(page + 8, timestamp); + int8store(page, timestamp); + page+= 8; /* maria version */ - int4store(page + (8 + 8), TRANSLOG_VERSION_ID); + int4store(page, TRANSLOG_VERSION_ID); + page+= 4; /* mysql version (MYSQL_VERSION_ID) */ - int4store(page + (8 + 8 + 4), log_descriptor.server_version); + int4store(page, log_descriptor.server_version); + page+= 4; /* server ID */ - int4store(page + (8 + 8 + 4 + 4), log_descriptor.server_id); - /* loghandler page size/512 */ - int2store(page + (8 + 8 + 4 + 4 + 4), TRANSLOG_PAGE_SIZE / 512); + int4store(page, log_descriptor.server_id); + page+= 4; + /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */ + int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE); + page+= 2; /* file number */ - int3store(page + (8 + 8 + 4 + 4 + 4 + 2), - LSN_FILE_NO(log_descriptor.horizon)); + int3store(page, LSN_FILE_NO(log_descriptor.horizon)); + page+= 3; + bzero(page, sizeof(page_buff) - (page- page_buff)); - bzero(page + (8 + 8 + 4 + 4 + 4 + 2 + 3), - TRANSLOG_PAGE_SIZE - (8 + 8 + 4 + 4 + 4 + 2 + 3)); - - if (my_pwrite(log_descriptor.log_file_num[0], page, - TRANSLOG_PAGE_SIZE, 0, MYF(MY_WME)) != TRANSLOG_PAGE_SIZE) - DBUG_RETURN(1); - - DBUG_RETURN(0); + DBUG_RETURN(my_pwrite(log_descriptor.log_file_num[0], page_buff, + sizeof(page_buff), 0, MYF(MY_WME | MY_NABP)) != 0); } @@ -463,8 +477,8 @@ static my_bool translog_write_file_header() buffer The buffer to initialize RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_buffer_init(struct st_translog_buffer *buffer) @@ -473,7 +487,7 @@ static my_bool translog_buffer_init(struct st_translog_buffer *buffer) /* This buffer offset */ buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; /* This Buffer File */ - buffer->file= 0; + buffer->file= -1; buffer->overlay= 0; /* IO cache for current log */ bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER); @@ -488,8 +502,6 @@ static my_bool translog_buffer_init(struct st_translog_buffer *buffer) /* lock for the buffer. Current buffer also lock the handler */ if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST)) DBUG_RETURN(1); - DBUG_PRINT("info", ("Init buffer #%u: 0x%lx", - (uint) buffer->buffer_no, (ulong) buffer)); DBUG_RETURN(0); } @@ -502,18 +514,23 @@ static my_bool translog_buffer_init(struct st_translog_buffer *buffer) file file descriptor RETURN - 0 OK - 1 Error + 0 OK + 1 Error */ static my_bool translog_close_log_file(File file) { - PAGECACHE_FILE fl= - { - file - }; + int rc; + PAGECACHE_FILE fl; + fl.file= file; flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE); - return test(my_close(file, MYF(MY_WME))); + /* + Sync file when we close it + TODO: sync only we have changed the log + */ + rc= my_sync(file, MYF(MY_WME)); + rc|= my_close(file, MYF(MY_WME)); + return test(rc); } @@ -532,20 +549,17 @@ static my_bool translog_create_new_file() { int i; uint32 file_no= LSN_FILE_NO(log_descriptor.horizon); - DBUG_ENTER("translog_create_new_file"); - if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] && + if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] != -1 && translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM - 1])) DBUG_RETURN(1); for (i= OPENED_FILES_NUM - 1; i > 0; i--) - { log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1]; - } if ((log_descriptor.log_file_num[0]= - open_logfile_by_number_no_cache(file_no)) <= 0 || + open_logfile_by_number_no_cache(file_no)) == -1 || translog_write_file_header()) DBUG_RETURN(1); @@ -565,8 +579,8 @@ static my_bool translog_create_new_file() buffer This buffer which should be locked RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ #ifndef DBUG_OFF @@ -574,21 +588,11 @@ static my_bool translog_buffer_lock(struct st_translog_buffer *buffer) { int res; DBUG_ENTER("translog_buffer_lock"); - DBUG_PRINT("enter", ("Lock buffer #%u (0x%lx): locked by:0x%lx, mutex: 0x%lx", - (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) &buffer->mutex)); + DBUG_PRINT("enter", + ("Lock buffer #%u: (0x%lx) mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); res= (pthread_mutex_lock(&buffer->mutex) != 0); -#ifndef DBUG_OFF - if (res == 0) - { - DBUG_ASSERT(buffer->locked_by == 0); - buffer->locked_by= my_thread_var; - } - else - DBUG_PRINT("error", ("Can't lock mutex 0x%lx (locked by0x%lx) errno: %d", - (ulong) &buffer->mutex, - (ulong) buffer->locked_by, res)); -#endif DBUG_RETURN(res); } #else @@ -605,8 +609,8 @@ static my_bool translog_buffer_lock(struct st_translog_buffer *buffer) buffer This buffer which should be unlocked RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ #ifndef DBUG_OFF @@ -614,16 +618,13 @@ static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer) { int res; DBUG_ENTER("translog_buffer_unlock"); - DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) :locked by:0x%lx (0x%lx)," - " mutex: 0x%lx", + DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) " + "mutex: 0x%lx", (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) my_thread_var, (ulong) &buffer->mutex)); - DBUG_ASSERT(buffer->locked_by == my_thread_var); - buffer->locked_by= 0; res= (pthread_mutex_unlock(&buffer->mutex) != 0); - DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx, mutex: 0x%lx", + DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx mutex: 0x%lx", (uint) buffer->buffer_no, (ulong) buffer, (ulong) &buffer->mutex)); DBUG_RETURN(res); @@ -635,7 +636,7 @@ static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer) /* - Write page header. + Write a header on the page SYNOPSIS translog_new_page_header() @@ -649,10 +650,10 @@ static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer) static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, struct st_buffer_cursor *cursor) { - uchar *ptr; + byte *ptr; DBUG_ENTER("translog_new_page_header"); - DBUG_ASSERT(cursor->ptr !=NULL); + DBUG_ASSERT(cursor->ptr); cursor->protected= 0; @@ -663,41 +664,43 @@ static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, /* File number */ int3store(ptr, LSN_FILE_NO(*horizon)); ptr+= 3; - *(ptr ++)= (uchar) log_descriptor.flags; + *(ptr++)= (byte) log_descriptor.flags; if (log_descriptor.flags & TRANSLOG_PAGE_CRC) { #ifndef DBUG_OFF DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)", (ulong) LSN_FILE_NO(*horizon), (ulong) LSN_OFFSET(*horizon))); + /* This will be overwritten by real CRC; This is just for debugging */ int4store(ptr, 0x11223344); #endif - /* CRC will be put when page will be finished */ - ptr+= 4; + /* CRC will be put when page is finished */ + ptr+= CRC_LENGTH; } if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) { time_t tm; - int2store(ptr, time(&tm) & 0xFFFF); - ptr+= (TRANSLOG_PAGE_SIZE / 512) * 2; + uint16 tmp_time= time(&tm); + int2store(ptr, tmp_time); + ptr+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; } { - uint len= (ptr -cursor->ptr); - *horizon+= len; /* it is increasing of offset part of the address */ - cursor->current_page_size= len; + uint len= (ptr - cursor->ptr); + (*horizon)+= len; /* it is increasing of offset part of the address */ + cursor->current_page_fill= len; if (!cursor->chaser) cursor->buffer->size+= len; } cursor->ptr= ptr; - DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr - cursor->buffer->buffer))); DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr -cursor->buffer->buffer) == + ((ulong) (cursor->ptr - cursor->buffer->buffer) == cursor->buffer->size)); DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); - DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); DBUG_VOID_RETURN; } @@ -709,24 +712,29 @@ static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, translog_put_sector_protection() page reference on the page content cursor cursor of the buffer + + NOTES + We put a sector protection on all following sectors on the page, + except the first sector that is protected by page header. */ -static void translog_put_sector_protection(uchar *page, +static void translog_put_sector_protection(byte *page, struct st_buffer_cursor *cursor) { - uchar *table= page + log_descriptor.page_overhead - - (TRANSLOG_PAGE_SIZE / 512) * 2; + byte *table= page + log_descriptor.page_overhead - + (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; uint16 value= uint2korr(table) + cursor->write_counter; - uint16 last_protected_sector= (cursor->previous_offset - 1) / 512; - uint16 start_sector= cursor->previous_offset / 512; + uint16 last_protected_sector= ((cursor->previous_offset - 1) / + DISK_DRIVE_SECTOR_SIZE); + uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE; uint i, offset; - DBUG_ENTER("translog_put_sector_protection"); - if (start_sector == 0) - start_sector= 1; - DBUG_PRINT("enter", ("Write counter %u, value %u, offset %u, " - "last protected %u, start sector %u", + if (start_sector == 0) + start_sector= 1; /* First sector is protected */ + + DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, " + "last protected:%u start sector:%u", (uint) cursor->write_counter, (uint) value, (uint) cursor->previous_offset, @@ -734,7 +742,7 @@ static void translog_put_sector_protection(uchar *page, if (last_protected_sector == start_sector) { i= last_protected_sector * 2; - offset= last_protected_sector * 512; + offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE; /* restore data, because we modified sector which was protected */ if (offset < cursor->previous_offset) page[offset]= table[i]; @@ -742,17 +750,17 @@ static void translog_put_sector_protection(uchar *page, if (offset < cursor->previous_offset) page[offset]= table[i + 1]; } - for (i= start_sector * 2, offset= start_sector * 512; - i < (TRANSLOG_PAGE_SIZE / 512) * 2; (i+= 2), (offset+= 512)) + for (i= start_sector * 2, offset= start_sector * DISK_DRIVE_SECTOR_SIZE; + i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + (i+= 2), (offset+= DISK_DRIVE_SECTOR_SIZE)) { - DBUG_PRINT("info", ("sector %u, offset %u, data 0x%x%x", + DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x", i / 2, offset, (uint) page[offset], (uint) page[offset + 1])); table[i]= page[offset]; table[i + 1]= page[offset + 1]; - /**((uint16 *)(table + i))= *((uint16* )(page + offset));*/ int2store(page + offset, value); - DBUG_PRINT("info", ("sector %u, offset %u, data 0x%x%x", + DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x", i / 2, offset, (uint) page[offset], (uint) page[offset + 1])); } @@ -761,42 +769,20 @@ static void translog_put_sector_protection(uchar *page, /* - Calculate adler CRC of given area + Calculate CRC32 of given area SYNOPSIS - translog_adler_crc() + translog_crc() area Pointer of the area beginning length The Area length RETURN - Adler CRC32 + CRC32 */ -uint32 translog_adler_crc(uchar *area, uint length) +static uint32 translog_crc(byte *area, uint length) { - uint32 a= 1, b= 0; -#define MOD_ADLER 65521 - - while (length) - { - uint tlen= length > 5550 ? 5550 : length; - length-= tlen; - do - { - a+= *area++; - b+= a; - } while (--tlen); - a= (a & 0xffff) + (a >> 16) * (65536 - MOD_ADLER); - b= (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER); - } - /* It can be shown that a <= 0x1013a here, so a single subtract will do. */ - if (a >= MOD_ADLER) - a-= MOD_ADLER; - /* It can be shown that b can reach 0xffef1 here. */ - b= (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER); - if (b >= MOD_ADLER) - b-= MOD_ADLER; - return (b << 16) | a; + return crc32(0L, area, length); } @@ -812,26 +798,25 @@ uint32 translog_adler_crc(uchar *area, uint length) static void translog_finish_page(TRANSLOG_ADDRESS *horizon, struct st_buffer_cursor *cursor) { - uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_size; - uchar *page= cursor->ptr -cursor->current_page_size; + uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill; + byte *page= cursor->ptr -cursor->current_page_fill; DBUG_ENTER("translog_finish_page"); - - DBUG_PRINT("enter", ("Buffer #%u 0x%lx, " - "Buffer addr (%lu,0x%lx), " - "Page addr: (%lu,0x%lx), " - "size %lu (%lu), Pg: %u, left: %u", + DBUG_PRINT("enter", ("Buffer: #%u 0x%lx " + "Buffer addr: (%lu,0x%lx) " + "Page addr: (%lu,0x%lx) " + "size:%lu (%lu) Pg:%u left:%u", (uint) cursor->buffer_no, (ulong) cursor->buffer, (ulong) LSN_FILE_NO(cursor->buffer->offset), (ulong) LSN_OFFSET(cursor->buffer->offset), (ulong) LSN_FILE_NO(*horizon), (ulong) (LSN_OFFSET(*horizon) - - cursor->current_page_size), + cursor->current_page_fill), (ulong) cursor->buffer->size, (ulong) (cursor->ptr -cursor->buffer->buffer), - (uint) cursor->current_page_size, (uint) left)); + (uint) cursor->current_page_fill, (uint) left)); DBUG_ASSERT(cursor->ptr !=NULL); DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == - cursor->current_page_size % TRANSLOG_PAGE_SIZE); + cursor->current_page_fill % TRANSLOG_PAGE_SIZE); DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)); DBUG_ASSERT(LSN_OFFSET(cursor->buffer->offset) + (cursor->ptr -cursor->buffer->buffer) == LSN_OFFSET(*horizon)); @@ -840,48 +825,51 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, DBUG_PRINT("info", ("Already protected and finished")); DBUG_VOID_RETURN; } - if (left != TRANSLOG_PAGE_SIZE && left != 0) + cursor->protected= 1; + + DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); + if (left != 0) { - DBUG_PRINT("info", ("left %u", (uint) left)); + DBUG_PRINT("info", ("left: %u", (uint) left)); bzero(cursor->ptr, left); cursor->ptr +=left; - *horizon+= left; /* offset increasing */ + (*horizon)+= left; /* offset increasing */ if (!cursor->chaser) cursor->buffer->size+= left; - cursor->current_page_size= 0; - DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx, " - "chaser: %d, Size: %lu (%lu)", + cursor->current_page_fill= 0; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu)", (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr - cursor->buffer->buffer))); DBUG_ASSERT(cursor->chaser - || ((ulong) (cursor->ptr -cursor->buffer->buffer) == + || ((ulong) (cursor->ptr - cursor->buffer->buffer) == cursor->buffer->size)); DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); } - if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) { translog_put_sector_protection(page, cursor); DBUG_PRINT("info", ("drop write_counter")); cursor->write_counter= 0; cursor->previous_offset= 0; } - if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) { - uint32 crc= translog_adler_crc(page + log_descriptor.page_overhead, - TRANSLOG_PAGE_SIZE - - log_descriptor.page_overhead); - DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc)); + uint32 crc= translog_crc(page + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: %lx", (ulong) crc)); + /* We have page number, file number and flag before crc */ int4store(page + 3 + 3 + 1, crc); } - cursor->protected= 1; DBUG_VOID_RETURN; } /* - Wait until all thread finish filling this buffer + Wait until all thread finish filling this buffer SYNOPSIS translog_wait_for_writers() @@ -893,46 +881,28 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, static void translog_wait_for_writers(struct st_translog_buffer *buffer) { - struct st_my_thread_var *thread; + struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("translog_wait_for_writers"); - DBUG_PRINT("enter", ("Buffer #%u 0x%lx, copies in progress: %u", + DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u", (uint) buffer->buffer_no, (ulong) buffer, (int) buffer->copy_to_buffer_in_progress)); - if (!buffer->copy_to_buffer_in_progress) - DBUG_VOID_RETURN; - - thread= my_thread_var; - - DBUG_ASSERT(buffer->file != 0); - - do + while (buffer->copy_to_buffer_in_progress) { - DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, " - "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + DBUG_PRINT("info", ("wait for writers... " + "buffer: #%u 0x%lx " "mutex: 0x%lx", - (ulong) thread, (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); -#ifndef DBUG_OFF - DBUG_ASSERT(buffer->locked_by == thread); - buffer->locked_by= 0; -#endif + DBUG_ASSERT(buffer->file != -1); wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, &buffer->mutex); - DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, " - "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + DBUG_PRINT("info", ("wait for writers done " + "buffer: #%u 0x%lx " "mutex: 0x%lx", - (ulong) thread, (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); -#ifndef DBUG_OFF - DBUG_ASSERT(buffer->locked_by == 0); - buffer->locked_by= thread; -#endif - } while (buffer->copy_to_buffer_in_progress != 0); + } DBUG_VOID_RETURN; } @@ -940,11 +910,11 @@ static void translog_wait_for_writers(struct st_translog_buffer *buffer) /* - Wait for this buffer become free + Wait for buffer to become free SYNOPSIS translog_wait_for_buffer_free() - buffer The buffer to initialize + buffer The buffer we are waiting for NOTE - this buffer should be locked @@ -954,51 +924,36 @@ static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("translog_wait_for_buffer_free"); - DBUG_PRINT("enter", ("Buffer #%u 0x%lx, copies in progress: %u size 0x%lu", + DBUG_PRINT("enter", ("Buffer: #%u 0x%lx copies in progress: %u " + "File: %d size: 0x%lu", (uint) buffer->buffer_no, (ulong) buffer, (int) buffer->copy_to_buffer_in_progress, - (ulong) buffer->size)); + buffer->file, (ulong) buffer->size)); translog_wait_for_writers(buffer); - if (!buffer->file) - DBUG_VOID_RETURN; - - thread= my_thread_var; - - do + while (buffer->file != -1) { - DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, " - "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + DBUG_PRINT("info", ("wait for writers... " + "buffer: #%u 0x%lx " "mutex: 0x%lx", - (ulong) thread, (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); -#ifndef DBUG_OFF - DBUG_ASSERT(buffer->locked_by == thread); - buffer->locked_by= 0; -#endif wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, &buffer->mutex); - DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, " - "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), " + DBUG_PRINT("info", ("wait for writers done. " + "buffer: #%u 0x%lx " "mutex: 0x%lx", - (ulong) thread, (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) thread, (ulong) &buffer->mutex)); -#ifndef DBUG_OFF - DBUG_ASSERT(buffer->locked_by == 0); - buffer->locked_by= thread; -#endif - } while (buffer->copy_to_buffer_in_progress != 0); + } + DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0); DBUG_VOID_RETURN; } /* - Set cursor on the buffer beginning + Initialize the cursor for a buffer SYNOPSIS translog_cursor_init() @@ -1015,9 +970,8 @@ static void translog_cursor_init(struct st_buffer_cursor *cursor, cursor->ptr= buffer->buffer; cursor->buffer= buffer; cursor->buffer_no= buffer_no; - cursor->current_page_size= 0; + cursor->current_page_fill= 0; cursor->chaser= (cursor != &log_descriptor.bc); - DBUG_PRINT("info", ("drop write_counter")); cursor->write_counter= 0; cursor->previous_offset= 0; cursor->protected= 0; @@ -1037,13 +991,13 @@ static void translog_cursor_init(struct st_buffer_cursor *cursor, static void translog_start_buffer(struct st_translog_buffer *buffer, struct st_buffer_cursor *cursor, - uint8 buffer_no) + uint buffer_no) { DBUG_ENTER("translog_start_buffer"); DBUG_PRINT("enter", - ("Assign buffer #%u (0x%lx) to file %u, offset 0x%lx(%lu)", + ("Assign buffer: #%u (0x%lx) to file: %d offset: 0x%lx(%lu)", (uint) buffer->buffer_no, (ulong) buffer, - (uint) log_descriptor.log_file_num[0], + log_descriptor.log_file_num[0], (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); DBUG_ASSERT(buffer_no == buffer->buffer_no); @@ -1053,12 +1007,12 @@ static void translog_start_buffer(struct st_translog_buffer *buffer, buffer->overlay= 0; buffer->size= 0; translog_cursor_init(cursor, buffer, buffer_no); - DBUG_PRINT("info", ("init cursor #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + DBUG_PRINT("info", ("init cursor #%u: 0x%lx chaser: %d Size: %lu (%lu)", (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr - cursor->buffer->buffer))); DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr -cursor->buffer->buffer) == + ((ulong) (cursor->ptr - cursor->buffer->buffer) == cursor->buffer->size)); DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); DBUG_VOID_RETURN; @@ -1079,21 +1033,21 @@ static void translog_start_buffer(struct st_translog_buffer *buffer, - after return new and old buffer still are locked RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, struct st_buffer_cursor *cursor, my_bool new_file) { - uint8 old_buffer_no= cursor->buffer_no; - uint8 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + uint old_buffer_no= cursor->buffer_no; + uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no; my_bool chasing= cursor->chaser; DBUG_ENTER("translog_buffer_next"); - DBUG_PRINT("info", ("horizon (%lu,0x%lx), chasing: %d", + DBUG_PRINT("info", ("horizon: (%lu,0x%lx) chasing: %d", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), chasing)); @@ -1113,8 +1067,8 @@ static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, if (new_file) { /* move the horizon to the next file and its header page */ - *horizon+= LSN_ONE_FILE; - *horizon= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE); + (*horizon)+= LSN_ONE_FILE; + (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE); if (!chasing && translog_create_new_file()) { DBUG_RETURN(1); @@ -1177,22 +1131,17 @@ static void translog_get_sent_to_file(LSN *lsn) RETURN first chunk offset - 0 - Error */ -static my_bool translog_get_first_chunk_offset(uchar *page) +static my_bool translog_get_first_chunk_offset(byte *page) { uint16 page_header= 7; DBUG_ENTER("translog_get_first_chunk_offset"); - if (page[6] & TRANSLOG_PAGE_CRC) - { + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) page_header+= 4; - } - if (page[6] & TRANSLOG_SECTOR_PROTECTION) - { - page_header+= (TRANSLOG_PAGE_SIZE / 512) * 2; - } + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + page_header+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; DBUG_RETURN(page_header); } @@ -1208,7 +1157,7 @@ static my_bool translog_get_first_chunk_offset(uchar *page) */ static void -translog_write_variable_record_1group_code_len(uchar *dst, +translog_write_variable_record_1group_code_len(byte *dst, translog_size_t length, uint16 header_len) { @@ -1223,7 +1172,7 @@ translog_write_variable_record_1group_code_len(uchar *dst, int2store(dst + 1, length); return; case 9: /* (5 + 4) */ - DBUG_ASSERT(length <= 0xFFFFFF); + DBUG_ASSERT(length <= (ulong) 0xFFFFFF); *dst= 252; int3store(dst + 1, length); return; @@ -1249,18 +1198,18 @@ translog_write_variable_record_1group_code_len(uchar *dst, decoded length */ -static translog_size_t translog_variable_record_1group_decode_len(uchar **src) +static translog_size_t translog_variable_record_1group_decode_len(byte **src) { uint8 first= (uint8) (**src); switch (first) { case 251: - *src+= 3; + (*src)+= 3; return (uint2korr((*src) - 2)); case 252: - *src+= 4; + (*src)+= 4; return (uint3korr((*src) - 3)); case 253: - *src+= 5; + (*src)+= 5; return (uint4korr((*src) - 4)); case 254: case 255: @@ -1283,25 +1232,24 @@ static translog_size_t translog_variable_record_1group_decode_len(uchar **src) RETURN total length of the chunk - 0 - Error */ -static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) +static uint16 translog_get_total_chunk_length(byte *page, uint16 offset) { DBUG_ENTER("translog_get_total_chunk_length"); switch (page[offset] & TRANSLOG_CHUNK_TYPE) { - case TRANSLOG_CHUNK_LSN: /* 0 chunk referred as LSN - (head or tail) */ + case TRANSLOG_CHUNK_LSN: { + /* 0 chunk referred as LSN (head or tail) */ translog_size_t rec_len; - uchar *start= page + offset; - uchar *ptr= start + 1 + 2; + byte *start= page + offset; + byte *ptr= start + 1 + 2; uint16 chunk_len, header_len, page_rest; DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); rec_len= translog_variable_record_1group_decode_len(&ptr); chunk_len= uint2korr(ptr); - header_len= (ptr -start) +2; - DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, header len: %u", + header_len= (ptr -start) + 2; + DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u", (ulong) rec_len, (uint) chunk_len, (uint) header_len)); if (chunk_len) { @@ -1317,11 +1265,14 @@ static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) DBUG_RETURN(page_rest); break; } - case TRANSLOG_CHUNK_FIXED: /* 1 (pseudo)fixed record (also - LSN) */ + case TRANSLOG_CHUNK_FIXED: { - DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED")); + byte *ptr; uint type= page[offset] & TRANSLOG_REC_TYPE; + uint length; + int i; + /* 1 (pseudo)fixed record (also LSN) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED")); DBUG_ASSERT(log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH || log_record_type_descriptor[type].class == @@ -1334,32 +1285,31 @@ static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3); } { - uchar *ptr= page + offset + 3; /* first compressed LSN */ - int i= 0; - uint length= log_record_type_descriptor[type].fixed_length + 3; - for (; i < log_record_type_descriptor[type].compresed_LSN; i++) + ptr= page + offset + 3; /* first compressed LSN */ + length= log_record_type_descriptor[type].fixed_length + 3; + for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++) { /* first 2 bits is length - 2 */ uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2; ptr+= len; - length-= (TRANSLOG_CLSN_MAX_LEN - len); /* subtract economized - bytes */ + /* subtract economized bytes */ + length-= (TRANSLOG_CLSN_MAX_LEN - len); } DBUG_PRINT("info", ("Pseudo-fixed length: %u", length)); DBUG_RETURN(length); } break; } - case TRANSLOG_CHUNK_NOHDR: /* 2 no header chunk (till page - end) */ - DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR, length: %u", + case TRANSLOG_CHUNK_NOHDR: + /* 2 no header chunk (till page end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u", (uint) (TRANSLOG_PAGE_SIZE - offset))); DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset); break; case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH")); DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3); - DBUG_PRINT("info", ("Length %u", uint2korr(page + offset + 1) + 3)); + DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3)); DBUG_RETURN(uint2korr(page + offset + 1) + 3); break; default: @@ -1376,8 +1326,8 @@ static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset) buffer This buffer should be flushed RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) @@ -1385,20 +1335,18 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) uint32 i; DBUG_ENTER("translog_buffer_flush"); DBUG_PRINT("enter", - ("Buffer #%u 0x%lx: locked by 0x%lx (0x%lx), " - "file: %u, offset (%lu,0x%lx), size %lu", + ("Buffer: #%u 0x%lx: " + "file: %d offset: (%lu,0x%lx) size: %lu", (uint) buffer->buffer_no, (ulong) buffer, - (ulong) buffer->locked_by, (ulong) my_thread_var, - (uint) buffer->file, + buffer->file, (ulong) LSN_FILE_NO(buffer->offset), (ulong) LSN_OFFSET(buffer->offset), (ulong) buffer->size)); - DBUG_ASSERT(buffer->locked_by == my_thread_var); - DBUG_ASSERT(buffer->file != 0); + DBUG_ASSERT(buffer->file != -1); translog_wait_for_writers(buffer); - if (buffer->overlay && buffer->overlay->file) + if (buffer->overlay && buffer->overlay->file != -1) { struct st_translog_buffer *overlay= buffer->overlay; translog_buffer_unlock(buffer); @@ -1410,10 +1358,8 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE) { - PAGECACHE_FILE file= - { - buffer->file - }; + PAGECACHE_FILE file; + file.file= buffer->file; if (pagecache_write(log_descriptor.pagecache, &file, (LSN_OFFSET(buffer->offset) + i) / TRANSLOG_PAGE_SIZE, @@ -1423,16 +1369,17 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DONE, 0)) { - UNRECOVERABLE_ERROR(("Cant't write page (%lu,0x%lx) to pagecacte", + UNRECOVERABLE_ERROR(("Can't write page (%lu,0x%lx) to pagecache", (ulong) buffer->file, (ulong) (LSN_OFFSET(buffer->offset)+ i))); } } if (my_pwrite(buffer->file, (char*) buffer->buffer, buffer->size, LSN_OFFSET(buffer->offset), - MYF(MY_WME)) != buffer->size) + MYF(MY_WME | MY_NABP))) { - UNRECOVERABLE_ERROR(("Cant't buffer (%lu,0x%lx) size %lu to the disk (%d)", + UNRECOVERABLE_ERROR(("Can't write buffer (%lu,0x%lx) size %lu " + "to the disk (%d)", (ulong) buffer->file, (ulong) LSN_OFFSET(buffer->offset), (ulong) buffer->size, errno)); @@ -1440,10 +1387,11 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) } if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */ translog_set_sent_to_file(&buffer->last_lsn); + /* Free buffer */ - buffer->file= 0; + buffer->file= -1; buffer->overlay= 0; - if (buffer->waiting_filling_buffer.last_thread != NULL) + if (buffer->waiting_filling_buffer.last_thread) { wqueue_release_queue(&buffer->waiting_filling_buffer); } @@ -1460,20 +1408,17 @@ static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) offset offset of failed sector RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ -static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset) +static my_bool translog_recover_page_up_to_sector(byte *page, uint16 offset) { uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end; DBUG_ENTER("translog_recover_page_up_to_sector"); - DBUG_PRINT("enter", ("offset %u, first chunk %u", + DBUG_PRINT("enter", ("offset: %u first chunk: %u", (uint) offset, (uint) chunk_offset)); - if (chunk_offset == 0) - DBUG_RETURN(1); - while (page[chunk_offset] != '\0' && chunk_offset < offset) { uint16 chunk_length; @@ -1484,11 +1429,11 @@ static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset) (uint) chunk_offset)); DBUG_RETURN(1); } - DBUG_PRINT("info", ("chunk: offset: %u, length %u", + DBUG_PRINT("info", ("chunk: offset: %u length %u", (uint) chunk_offset, (uint) chunk_length)); if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE) { - UNRECOVERABLE_ERROR(("demaged chunk (offset %u) in trusted area", + UNRECOVERABLE_ERROR(("damaged chunk (offset %u) in trusted area", (uint) chunk_offset)); DBUG_RETURN(1); } @@ -1496,21 +1441,20 @@ static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset) } valid_chunk_end= chunk_offset; - /*end of trusted area - sector parsing */ + /* end of trusted area - sector parsing */ while (page[chunk_offset] != '\0') { uint16 chunk_length; if ((chunk_length= translog_get_total_chunk_length(page, chunk_offset)) == 0) - { break; - } - DBUG_PRINT("info", ("chunk: offset: %u, length %u", + + DBUG_PRINT("info", ("chunk: offset: %u length %u", (uint) chunk_offset, (uint) chunk_length)); - if (((ulong) chunk_offset) + ((ulong) chunk_length) > (uint) (offset + 512)) - { + if (((ulong) chunk_offset) + ((ulong) chunk_length) > + (uint) (offset + DISK_DRIVE_SECTOR_SIZE)) break; - } + chunk_offset+= chunk_length; valid_chunk_end= chunk_offset; } @@ -1531,23 +1475,25 @@ static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset) data data, need for validation (address in this case) RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ -static my_bool translog_page_validator(byte *page_addr, gptr data) +static my_bool translog_page_validator(byte *page_addr, gptr data_ptr) { - uint8 flags; - uchar *page= (uchar*) page_addr; + uint this_page_page_overhead; + uint flags; + byte *page= (byte*) page_addr, *page_pos; + TRANSLOG_VALIDATOR_DATA *data= (TRANSLOG_VALIDATOR_DATA *) data_ptr; + TRANSLOG_ADDRESS addr= *(data->addr); DBUG_ENTER("translog_page_validator"); - TRANSLOG_ADDRESS addr= *((TRANSLOG_VALIDATOR_DATA*) data)->addr; - ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 0; + data->was_recovered= 0; if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE || uint3korr(page + 3) != LSN_FILE_NO(addr)) { UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " - "page address written in the page is incorrect :" + "page address written in the page is incorrect: " "File %lu instead of %lu or page %lu instead of %lu", (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr), @@ -1555,7 +1501,8 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE)); DBUG_RETURN(1); } - flags= page[3 + 3]; + flags= (uint)(page[TRANSLOG_PAGE_FLAGS]); + this_page_page_overhead= page_overhead[flags]; if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | TRANSLOG_RECORD_CRC)) { @@ -1565,65 +1512,58 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) (uint) flags)); DBUG_RETURN(1); } + page_pos= page + (3 + 3 + 1); if (flags & TRANSLOG_PAGE_CRC) { - uint32 crc= translog_adler_crc(page + log_descriptor.page_overhead, - TRANSLOG_PAGE_SIZE - - log_descriptor.page_overhead); - if (crc != uint4korr(page + 3 + 3 + 1)) + uint32 crc= translog_crc(page + this_page_page_overhead, + TRANSLOG_PAGE_SIZE - + this_page_page_overhead); + if (crc != uint4korr(page_pos)) { UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " "CRC mismatch: calculated: %lx on the page %lx", (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), - (ulong) crc, (ulong) uint4korr(page + 3 + 3 + 1))); + (ulong) crc, (ulong) uint4korr(page_pos))); DBUG_RETURN(1); } + page_pos+= CRC_LENGTH; /* Skip crc */ } if (flags & TRANSLOG_SECTOR_PROTECTION) { uint i, offset; - uchar *table= (page + 3 + 3 + 1 + ((flags & TRANSLOG_PAGE_CRC) ? 4 : 0)); + byte *table= page_pos; uint16 current= uint2korr(table); - for (i= 2, offset= 512; - i < (TRANSLOG_PAGE_SIZE / 512) * 2; i+= 2, offset+= 512) + for (i= 2, offset= DISK_DRIVE_SECTOR_SIZE; + i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + i+= 2, offset+= DISK_DRIVE_SECTOR_SIZE) { /* - TODO: add cunk counting for "suspecting" sectors (difference is - more that 1-2) + TODO: add chunk counting for "suspecting" sectors (difference is + more than 1-2) */ uint16 test= uint2korr(page + offset); - DBUG_PRINT("info", ("sector #%u offset %u current %lx " - "read 0x%x stored 0x%x%x", + DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx " + "read: 0x%x stored: 0x%x%x", i / 2, offset, (ulong) current, (uint) uint2korr(page + offset), (uint) table[i], (uint) table[i + 1])); - if (test < current) - { - if (0xFFFFLL - current + test > 512 / 3) - { - /* it is not overflow */ - if (translog_recover_page_up_to_sector(page, offset)) - DBUG_RETURN(1); - ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 1; - DBUG_RETURN(0); - } - } - else if (test - current > 512 / 3) + if (((test < current) && + (LL(0xFFFF) - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) || + ((test >= current) && + (test - current > DISK_DRIVE_SECTOR_SIZE / 3))) { if (translog_recover_page_up_to_sector(page, offset)) DBUG_RETURN(1); - ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 1; + data->was_recovered= 1; DBUG_RETURN(0); } /* Return value on the page */ page[offset]= table[i]; page[offset + 1]= table[i + 1]; - /**((uint16*)page + offset)= *((uint16*)(table + i));*/ - current= test; - DBUG_PRINT("info", ("sector #%u offset %u current %lx " - "read 0x%x stored 0x%x%x", + DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx " + "read: 0x%x stored: 0x%x%x", i / 2, offset, (ulong) current, (uint) uint2korr(page + offset), (uint) table[i], (uint) table[i + 1])); @@ -1643,17 +1583,17 @@ static my_bool translog_page_validator(byte *page_addr, gptr data) (might not be used in some cache implementations) RETURN - pointer to the page cache which should be used to read this page NULL - Error + # pointer to the page cache which should be used to read this page */ -static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) +static byte *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, byte *buffer) { TRANSLOG_ADDRESS addr= *(data->addr); uint cache_index; uint32 file_no= LSN_FILE_NO(addr); DBUG_ENTER("translog_get_page"); - DBUG_PRINT("enter", ("File %lu, Offset %lu(0x%lx)", + DBUG_PRINT("enter", ("File: %lu Offset: %lu(0x%lx)", (ulong) file_no, (ulong) LSN_OFFSET(addr), (ulong) LSN_OFFSET(addr))); @@ -1666,17 +1606,15 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) { PAGECACHE_FILE file; /* file in the cache */ - if (log_descriptor.log_file_num[cache_index] == 0) + if (log_descriptor.log_file_num[cache_index] == -1) { if ((log_descriptor.log_file_num[cache_index]= - open_logfile_by_number_no_cache(file_no)) == 0) - { + open_logfile_by_number_no_cache(file_no)) == -1) DBUG_RETURN(NULL); - } } file.file= log_descriptor.log_file_num[cache_index]; - buffer= (uchar*) + buffer= (byte*) pagecache_valid_read(log_descriptor.pagecache, &file, LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, 3, (char*) buffer, @@ -1686,7 +1624,17 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) } else { + /* + TODO: WE KEEP THE LAST OPENED_FILES_NUM FILES IN THE LOG CACHE, NOT + THE LAST USED FILES. THIS WILL BE A NOTABLE PROBLEM IF WE ARE + FOLLOWING AN UNDO CHAIN THAT GOES OVER MANY OLD LOG FILES. WE WILL + PROBABLY NEED SPECIAL HANDLING OF THIS OR HAVE A FILO FOR THE LOG + FILES. + */ + File file= open_logfile_by_number_no_cache(file_no); + if (file == -1) + DBUG_RETURN(NULL); if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE, LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME))) buffer= NULL; @@ -1708,8 +1656,8 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) last_page_ok assigned 1 if last page was OK RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, @@ -1721,11 +1669,10 @@ static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, uint32 file_no= LSN_FILE_NO(*addr); DBUG_ENTER("translog_get_last_page_addr"); - if ((stat= my_stat (translog_filename_by_fileno(file_no, - path), - &stat_buff, MYF(MY_WME))) == NULL) + if (!(stat= my_stat(translog_filename_by_fileno(file_no, path), + &stat_buff, MYF(MY_WME)))) DBUG_RETURN(1); - DBUG_PRINT("info", ("File size %lu", (ulong) stat->st_size)); + DBUG_PRINT("info", ("File size: %lu", (ulong) stat->st_size)); if (stat->st_size > TRANSLOG_PAGE_SIZE) { rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) * @@ -1738,7 +1685,7 @@ static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, rec_offset= 0; } *addr= MAKE_LSN(file_no, rec_offset); - DBUG_PRINT("info", ("Last page: 0x%lx, ok %d", (ulong) rec_offset, + DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset, *last_page_ok)); DBUG_RETURN(0); } @@ -1759,9 +1706,9 @@ static uint translog_variable_record_length_bytes(translog_size_t length) { if (length < 250) return 1; - else if (length < 0xFFFF) + if (length < 0xFFFF) return 3; - else if (length < 0xFFFFFF) + if (length < (ulong) 0xFFFFFF) return 4; return 5; } @@ -1776,47 +1723,49 @@ static uint translog_variable_record_length_bytes(translog_size_t length) offset Offset of the chunk on this place RETURN - total length of the chunk - 0 - Error + # total length of the chunk + 0 Error */ -static uint16 translog_get_chunk_header_length(uchar *page, uint16 offset) +static uint16 translog_get_chunk_header_length(byte *page, uint16 offset) { DBUG_ENTER("translog_get_chunk_header_length"); - switch (page[offset] & TRANSLOG_CHUNK_TYPE) { - case TRANSLOG_CHUNK_LSN: /* 0 chunk referred as LSN - (head or tail) */ + page+= offset; + switch (*page & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: { + /* 0 chunk referred as LSN (head or tail) */ translog_size_t rec_len; - uchar *start= page + offset; - uchar *ptr= start + 1 + 2; + byte *start= page; + byte *ptr= start + 1 + 2; uint16 chunk_len, header_len; DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); rec_len= translog_variable_record_1group_decode_len(&ptr); chunk_len= uint2korr(ptr); - header_len= (ptr -start) +2; - DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, header len: %u", + header_len= (ptr - start) +2; + DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u", (ulong) rec_len, (uint) chunk_len, (uint) header_len)); if (chunk_len) { - /*TODO: fine header end */ + /* TODO: fine header end */ DBUG_ASSERT(0); } DBUG_RETURN(header_len); break; } - case TRANSLOG_CHUNK_FIXED: /* 1 (pseudo)fixed record (also - LSN) */ + case TRANSLOG_CHUNK_FIXED: { + /* 1 (pseudo)fixed record (also LSN) */ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3")); DBUG_RETURN(3); } - case TRANSLOG_CHUNK_NOHDR: /* 2 no header chunk (till page - end) */ + case TRANSLOG_CHUNK_NOHDR: + /* 2 no header chunk (till page end) */ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1")); DBUG_RETURN(1); break; - case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */ + case TRANSLOG_CHUNK_LNGTH: + /* 3 chunk with chunk length */ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3")); DBUG_RETURN(3); break; @@ -1840,8 +1789,8 @@ static uint16 translog_get_chunk_header_length(uchar *page, uint16 offset) TRANSLOG_RECORD_CRC) RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ my_bool translog_init(const char *directory, @@ -1851,11 +1800,13 @@ my_bool translog_init(const char *directory, { int i; int old_log_was_recovered= 0, logs_found= 0; + uint old_flags= flags; TRANSLOG_ADDRESS sure_page, last_page, last_valid_page; DBUG_ENTER("translog_init"); - if (pthread_mutex_init(&log_descriptor.sent_to_file_lock, MY_MUTEX_INIT_FAST)) + if (pthread_mutex_init(&log_descriptor.sent_to_file_lock, + MY_MUTEX_INIT_FAST)) DBUG_RETURN(1); /* Directory to store files */ @@ -1883,11 +1834,16 @@ my_bool translog_init(const char *directory, ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | TRANSLOG_RECORD_CRC)) == 0); log_descriptor.flags= flags; - log_descriptor.page_overhead= 7; - if (flags & TRANSLOG_PAGE_CRC) - log_descriptor.page_overhead+= 4; - if (flags & TRANSLOG_SECTOR_PROTECTION) - log_descriptor.page_overhead+= (TRANSLOG_PAGE_SIZE / 512) * 2; + for (i= 0; i < TRANSLOG_FLAGS_NUM; i++) + { + page_overhead[i]= 7; + if (i & TRANSLOG_PAGE_CRC) + page_overhead[i]+= CRC_LENGTH; + if (i & TRANSLOG_SECTOR_PROTECTION) + page_overhead[i]+= (TRANSLOG_PAGE_SIZE / + DISK_DRIVE_SECTOR_SIZE) * 2; + } + log_descriptor.page_overhead= page_overhead[flags]; log_descriptor.page_capacity_chunk_2= TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1; DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0); @@ -1897,7 +1853,7 @@ my_bool translog_init(const char *directory, log_descriptor.half_buffer_capacity_chunk_2= log_descriptor.buffer_capacity_chunk_2 / 2; DBUG_PRINT("info", - ("Overhead: %u, pc2: %u, bc2: %u, bc2/2: %u", + ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u", log_descriptor.page_overhead, log_descriptor.page_capacity_chunk_2, log_descriptor.buffer_capacity_chunk_2, @@ -1907,9 +1863,7 @@ my_bool translog_init(const char *directory, /* Init log handler file handlers cache */ for (i= 0; i < OPENED_FILES_NUM; i++) - { - log_descriptor.log_file_num[i]= 0; - } + log_descriptor.log_file_num[i]= -1; /* just to init it somehow */ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); @@ -1917,12 +1871,13 @@ my_bool translog_init(const char *directory, /* Buffers for log writing */ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) { -#ifndef DBUG_OFF - log_descriptor.buffers[i].buffer_no= (uint8) i; - log_descriptor.buffers[i].locked_by= NULL; -#endif if (translog_buffer_init(log_descriptor.buffers + i)) DBUG_RETURN(1); +#ifndef DBUG_OFF + log_descriptor.buffers[i].buffer_no= (uint8) i; +#endif + DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx", + i, (ulong) log_descriptor.buffers + i)); } logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO); @@ -1931,12 +1886,12 @@ my_bool translog_init(const char *directory, { my_bool pageok; /* - TODO: scan directory for maria_log.XXXXXXXX files and find + TODO: scan directory for maria_log.XXXXXXXX files and find highest XXXXXXXX & set logs_found - */ + TODO: check that last checkpoint within present log addresses space - /* TODO: check that last checkpoint within present log addresses space */ - /* find the log end */ + find the log end + */ if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) { DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0); @@ -1994,17 +1949,16 @@ my_bool translog_init(const char *directory, } do { - TRANSLOG_VALIDATOR_DATA data= - { - ¤t_page, 0 - }; - uchar buffer[TRANSLOG_PAGE_SIZE], *page; + TRANSLOG_VALIDATOR_DATA data; + byte buffer[TRANSLOG_PAGE_SIZE], *page; + data.addr= ¤t_page; if ((page= translog_get_page(&data, buffer)) == NULL) DBUG_RETURN(1); if (data.was_recovered) { - DBUG_PRINT("error", ("file no %u (%d), rec_offset 0x%lx (%lu) (%d)", - (uint) LSN_FILE_NO(current_page), + DBUG_PRINT("error", ("file no: %lu (%d) " + "rec_offset: 0x%lx (%lu) (%d)", + (ulong) LSN_FILE_NO(current_page), (uint3korr(page + 3) != LSN_FILE_NO(current_page)), (ulong) LSN_OFFSET(current_page), @@ -2016,6 +1970,7 @@ my_bool translog_init(const char *directory, old_log_was_recovered= 1; break; } + old_flags= page[TRANSLOG_PAGE_FLAGS]; last_valid_page= current_page; current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */ } while (current_page <= current_file_last_page); @@ -2029,27 +1984,27 @@ my_bool translog_init(const char *directory, /* TODO: issue error */ DBUG_RETURN(1); } - DBUG_PRINT("info", ("Last valid page is in file %lu offset %lu (0x%lx), " - "Logs found: %d, was recovered: %d", + DBUG_PRINT("info", ("Last valid page is in file: %lu " + "offset: %lu (0x%lx) " + "Logs found: %d was recovered: %d " + "flags match: %d", (ulong) LSN_FILE_NO(last_valid_page), (ulong) LSN_OFFSET(last_valid_page), (ulong) LSN_OFFSET(last_valid_page), - logs_found, old_log_was_recovered)); + logs_found, old_log_was_recovered, + (old_flags == flags))); /* TODO: check server ID */ - if (logs_found && !old_log_was_recovered) + if (logs_found && !old_log_was_recovered && old_flags == flags) { - TRANSLOG_VALIDATOR_DATA data= - { - &last_valid_page, 0 - }; - uchar buffer[TRANSLOG_PAGE_SIZE], *page; + TRANSLOG_VALIDATOR_DATA data; + byte buffer[TRANSLOG_PAGE_SIZE], *page; uint16 chunk_offset; + data.addr= &last_valid_page; /* continue old log */ DBUG_ASSERT(LSN_FILE_NO(last_valid_page)== LSN_FILE_NO(log_descriptor.horizon)); - if ((page= translog_get_page(&data, - buffer)) == NULL || + if ((page= translog_get_page(&data, buffer)) == NULL || (chunk_offset= translog_get_first_chunk_offset(page)) == 0) DBUG_RETURN(1); @@ -2066,38 +2021,37 @@ my_bool translog_init(const char *directory, if ((chunk_length= translog_get_total_chunk_length(page, chunk_offset)) == 0) DBUG_RETURN(1); - DBUG_PRINT("info", ("chunk: offset: %u, length %u", + DBUG_PRINT("info", ("chunk: offset: %u length: %u", (uint) chunk_offset, (uint) chunk_length)); chunk_offset+= chunk_length; /* chunk can't cross the page border */ DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE); } - memmove(log_descriptor.buffers->buffer, page, chunk_offset); + memcpy(log_descriptor.buffers->buffer, page, chunk_offset); log_descriptor.bc.buffer->size+= chunk_offset; log_descriptor.bc.ptr+= chunk_offset; - log_descriptor.bc.current_page_size= chunk_offset; + log_descriptor.bc.current_page_fill= chunk_offset; log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, (chunk_offset + LSN_OFFSET(last_valid_page))); - DBUG_PRINT("info", ("Move Page #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)", (uint) log_descriptor.bc.buffer_no, (ulong) log_descriptor.bc.buffer, log_descriptor.bc.chaser, (ulong) log_descriptor.bc.buffer->size, - (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + (ulong) (log_descriptor.bc.ptr - log_descriptor.bc. buffer->buffer))); - DBUG_ASSERT(log_descriptor.bc.chaser - || + DBUG_ASSERT(log_descriptor.bc.chaser || ((ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == log_descriptor.bc.buffer->size)); DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == log_descriptor.bc.buffer_no); - DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); } } - DBUG_PRINT("info", ("Logs found: %d, was recovered %d", + DBUG_PRINT("info", ("Logs found: %d was recovered: %d", logs_found, old_log_was_recovered)); if (!logs_found) { @@ -2105,9 +2059,9 @@ my_bool translog_init(const char *directory, /* Used space */ log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); // header page /* Current logs file number in page cache */ - log_descriptor.log_file_num[0]= - open_logfile_by_number_no_cache(1); - if (translog_write_file_header()) + if ((log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(1)) == -1 || + translog_write_file_header()) DBUG_RETURN(1); if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1, CONTROL_FILE_UPDATE_ONLY_LOGNO)) @@ -2116,44 +2070,29 @@ my_bool translog_init(const char *directory, translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); } - else if (old_log_was_recovered) + else if (old_log_was_recovered || old_flags != flags) { - int buffer_touched= log_descriptor.bc.buffer->file; - if (buffer_touched) - { - struct st_translog_buffer *buffer= log_descriptor.bc.buffer; - /* - We are in initialization so we can use translog_buffer_lock instead - of translog_lock, because there is no other threads which can lock - the loghandler. - */ - if (translog_buffer_lock(buffer) || - translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, - 1) || - translog_buffer_unlock(log_descriptor.bc.buffer) || - translog_buffer_flush(buffer) || translog_buffer_unlock(buffer)) - DBUG_RETURN(1); - } - else - { - /* leave the demaged file untouched */ - log_descriptor.horizon+= LSN_ONE_FILE; - /* header page */ - log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, - TRANSLOG_PAGE_SIZE); - if (translog_create_new_file()) - DBUG_RETURN(1); - /* - Buffer system left untouched after recovery => we should init it - (starting from buffer 0) - */ - translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); - translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); - } + /* leave the damaged file untouched */ + log_descriptor.horizon+= LSN_ONE_FILE; + /* header page */ + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); + if (translog_create_new_file()) + DBUG_RETURN(1); + /* + Buffer system left untouched after recovery => we should init it + (starting from buffer 0) + */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); } /* all LSNs that are on disk are flushed */ log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon; + /* + horizon is (potentially) address of the next LSN we need decrease + it to signal that all LSNs before it are flushed + */ log_descriptor.flushed--; /* offset decreased */ log_descriptor.sent_to_file--; /* offset decreased */ @@ -2169,31 +2108,31 @@ my_bool translog_init(const char *directory, buffer_no The buffer to free NOTE - This buffer should be locked; + This buffer should be locked */ static void translog_buffer_destroy(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_destroy"); DBUG_PRINT("enter", - ("Buffer #%u: 0x%lx, file: %u, offset (%lu,0x%lx), size %lu", + ("Buffer #%u: 0x%lx file: %d offset: (%lu,0x%lx) size: %lu", (uint) buffer->buffer_no, (ulong) buffer, - (uint) buffer->file, + buffer->file, (ulong) LSN_FILE_NO(buffer->offset), (ulong) LSN_OFFSET(buffer->offset), (ulong) buffer->size)); DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0); - if (buffer->file) + if (buffer->file != -1) { /* - We ignore error here, because we can't do something about it + We ignore errors here, because we can't do something about it (it is shutting down) */ translog_buffer_flush(buffer); } - DBUG_PRINT("info", ("Unlock mutex 0x%lx", (ulong) &buffer->mutex)); + DBUG_PRINT("info", ("Unlock mutex: 0x%lx", (ulong) &buffer->mutex)); pthread_mutex_unlock(&buffer->mutex); - DBUG_PRINT("info", ("Destroy mutex 0x%lx", (ulong) &buffer->mutex)); + DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex)); pthread_mutex_destroy(&buffer->mutex); DBUG_VOID_RETURN; } @@ -2208,21 +2147,25 @@ static void translog_buffer_destroy(struct st_translog_buffer *buffer) void translog_destroy() { - int i; + uint i; DBUG_ENTER("translog_destroy"); - if (log_descriptor.bc.buffer->file != 0) + if (log_descriptor.bc.buffer->file != -1) translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc); for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) { struct st_translog_buffer *buffer= log_descriptor.buffers + i; + /* + Lock the buffer just for safety, there should not be other + threads running. + */ translog_buffer_lock(buffer); translog_buffer_destroy(buffer); } /* close files */ for (i= 0; i < OPENED_FILES_NUM; i++) { - if (log_descriptor.log_file_num[i]) + if (log_descriptor.log_file_num[i] != -1) translog_close_log_file(log_descriptor.log_file_num[i]); } pthread_mutex_destroy(&log_descriptor.sent_to_file_lock); @@ -2238,8 +2181,8 @@ void translog_destroy() translog_lock() RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_lock() @@ -2248,7 +2191,7 @@ static my_bool translog_lock() DBUG_ENTER("translog_lock"); /* - locking the loghandler mean locking current buffer, but it can change + Locking the loghandler mean locking current buffer, but it can change during locking, so we should check it */ for (;;) @@ -2271,22 +2214,18 @@ static my_bool translog_lock() translog_unlock() RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ -#ifndef DBUG_OFF -static my_bool translog_unlock() +static inline my_bool translog_unlock() { DBUG_ENTER("translog_unlock"); translog_buffer_unlock(log_descriptor.bc.buffer); DBUG_RETURN(0); } -#else -#define translog_unlock() \ - translog_buffer_unlock(log_descriptor.bc.buffer); -#endif + /* Start new page @@ -2296,14 +2235,14 @@ static my_bool translog_unlock() horizon \ Position in file and buffer where we are cursor / prev_buffer Buffer which should be flushed will be assigned - here if it is need + here if it is need. This is always set. NOTE handler should be locked RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, @@ -2318,10 +2257,10 @@ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, (LSN_OFFSET(*horizon) > log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE)) { - DBUG_PRINT("info", ("Switch to next buffer, Buffer Size %lu (%lu) => %d, " - "File size %lu max %lu => %d", + DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d " + "File size: %lu max: %lu => %d", (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer), + (ulong) (cursor->ptr - cursor->buffer->buffer), (cursor->ptr + TRANSLOG_PAGE_SIZE > cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER), (ulong) LSN_OFFSET(*horizon), @@ -2335,17 +2274,17 @@ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, TRANSLOG_PAGE_SIZE))) DBUG_RETURN(1); *prev_buffer= buffer; - DBUG_PRINT("info", ("Buffer #%u (0x%lu) have to be flushed", + DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed", (uint) buffer->buffer_no, (ulong) buffer)); } else { - DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu), " - "Buffer Size %lu (%lu)", + DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): " + "Buffer Size: %lu (%lu)", (uint) buffer->buffer_no, (ulong) buffer, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr - cursor->buffer->buffer))); translog_finish_page(horizon, cursor); translog_new_page_header(horizon, cursor); *prev_buffer= NULL; @@ -2365,39 +2304,39 @@ static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, buffer buffer with data RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, struct st_buffer_cursor *cursor, translog_size_t length, - uchar *buffer) + byte *buffer) { DBUG_ENTER("translog_write_data_on_page"); - DBUG_PRINT("enter", ("Chunk length: %lu Page size %u", - (ulong) length, (uint) cursor->current_page_size)); + DBUG_PRINT("enter", ("Chunk length: %lu Page size %u", + (ulong) length, (uint) cursor->current_page_fill)); DBUG_ASSERT(length > 0); - DBUG_ASSERT(length + cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER); - memmove(cursor->ptr, buffer, length); + memcpy(cursor->ptr, buffer, length); cursor->ptr+= length; - *horizon+= length; /* adds offset */ - cursor->current_page_size+= length; + (*horizon)+= length; /* adds offset */ + cursor->current_page_fill+= length; if (!cursor->chaser) cursor->buffer->size+= length; - DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx," - "chaser: %d, Size: %lu (%lu)", + DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu)", (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr - cursor->buffer->buffer))); DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr -cursor->buffer->buffer) == + ((ulong) (cursor->ptr - cursor->buffer->buffer) == cursor->buffer->size)); DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); - DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); DBUG_RETURN(0); } @@ -2414,8 +2353,8 @@ static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, parts IN/OUT chunk source RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, @@ -2426,80 +2365,82 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, translog_size_t left= length; uint cur= (uint) parts->current; DBUG_ENTER("translog_write_parts_on_page"); - DBUG_PRINT("enter", ("Chunk length: %lu, parts %u of %u. Page size %u, " + DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u " "Buffer size: %lu (%lu)", (ulong) length, (uint) (cur + 1), (uint) parts->parts.elements, - (uint) cursor->current_page_size, + (uint) cursor->current_page_fill, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer))); + (ulong) (cursor->ptr - cursor->buffer->buffer))); DBUG_ASSERT(length > 0); - DBUG_ASSERT(length + cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER); do { translog_size_t len; - struct st_translog_part part; - uchar *buff; + struct st_translog_part *part; + byte *buff; DBUG_ASSERT(cur < parts->parts.elements); - get_dynamic(&parts->parts, (gptr) &part, cur); - buff= part.buff; - DBUG_PRINT("info", ("Part %u, Length: %lu, left: %lu", - (uint) (cur + 1), (ulong) part.len, (ulong) left)); + part= dynamic_element(&parts->parts, cur, struct st_translog_part *); + buff= part->buff; + DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu", + (uint) (cur + 1), (ulong) part->len, (ulong) left)); - if (part.len > left) + if (part->len > left) { /* we should write less then the current part */ len= left; - part.len-= len; - part.buff+= len; - if (set_dynamic(&parts->parts, (gptr) &part, cur)) - DBUG_RETURN(1); - DBUG_PRINT("info", ("Set new part %u, Length: %lu", - (uint) (cur + 1), (ulong) part.len)); + part->len-= len; + part->buff+= len; + DBUG_PRINT("info", ("Set new part: %u Length: %lu", + (uint) (cur + 1), (ulong) part->len)); } else { - len= part.len; + len= part->len; cur++; DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len)); } DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u", (ulong) cursor->ptr, (ulong)buff, (uint)len)); - memmove(cursor->ptr, buff, len); + memcpy(cursor->ptr, buff, len); left-= len; cursor->ptr+= len; } while (left); - DBUG_PRINT("info", ("Horizon (%lu,0x%lx) Length %lu(0x%lx)", + DBUG_PRINT("info", ("Horizon: (%lu,0x%lx) Length %lu(0x%lx)", (ulong) LSN_FILE_NO(*horizon), (ulong) LSN_OFFSET(*horizon), (ulong) length, (ulong) length)); parts->current= cur; - *horizon+= length; /* offset increasing */ - cursor->current_page_size+= length; + (*horizon)+= length; /* offset increasing */ + cursor->current_page_fill+= length; if (!cursor->chaser) cursor->buffer->size+= length; DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx " "chaser: %d Size: %lu (%lu) " - "Horizon (%lu,0x%lx) buff offset 0x%lx", + "Horizon: (%lu,0x%lx) buff offset: 0x%lx", (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, - (ulong) (cursor->ptr -cursor->buffer->buffer), + (ulong) (cursor->ptr - cursor->buffer->buffer), (ulong) LSN_FILE_NO(*horizon), (ulong) LSN_OFFSET(*horizon), (ulong) (LSN_OFFSET(cursor->buffer->offset) + cursor->buffer->size))); + /* + TODO: make one check function for the buffer/loghandler + */ + DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr -cursor->buffer->buffer) == + ((ulong) (cursor->ptr - cursor->buffer->buffer) == cursor->buffer->size)); DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == - cursor->current_page_size % TRANSLOG_PAGE_SIZE); - DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE); + cursor->current_page_fill % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); DBUG_RETURN(0); } @@ -2511,7 +2452,7 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, SYNOPSIS translog_write_variable_record_1group_header() parts Descriptor of record source parts - type the log record type + type The log record type short_trid Sort transaction ID or 0 if it has no sense header_length Calculated header length of chunk type 0 chunk0_header Buffer for the chunk header writing @@ -2522,18 +2463,20 @@ translog_write_variable_record_1group_header(struct st_translog_parts *parts, enum translog_record_type type, SHORT_TRANSACTION_ID short_trid, uint16 header_length, - uchar *chunk0_header) + byte *chunk0_header) { struct st_translog_part part; - DBUG_ASSERT(parts->current != 0); /* first part is left for - header */ + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ parts->total_record_length+= (part.len= header_length); part.buff= chunk0_header; - *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN); + /* puts chunk type */ + *chunk0_header= (byte) (type | TRANSLOG_CHUNK_LSN); int2store(chunk0_header + 1, short_trid); + /* puts record length */ translog_write_variable_record_1group_code_len(chunk0_header + 3, parts->record_length, header_length); + /* puts 0 as chunk length which indicate 1 group record */ int2store(chunk0_header + header_length - 2, 0); parts->current--; set_dynamic(&parts->parts, (gptr) &part, parts->current); @@ -2548,20 +2491,16 @@ translog_write_variable_record_1group_header(struct st_translog_parts *parts, buffer target buffer */ -#ifndef DBUG_OFF -static void translog_buffer_increase_writers(struct st_translog_buffer *buffer) +static inline void +translog_buffer_increase_writers(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_increase_writers"); buffer->copy_to_buffer_in_progress++; - DBUG_PRINT("info", ("copy_to_buffer_in_progress, buffer #%u 0x%lx: %d", + DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d", (uint) buffer->buffer_no, (ulong) buffer, buffer->copy_to_buffer_in_progress)); DBUG_VOID_RETURN; } -#else -#define translog_buffer_increase_writers(B) \ - (B)->copy_to_buffer_in_progress++; -#endif /* @@ -2577,14 +2516,12 @@ static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_decrease_writers"); buffer->copy_to_buffer_in_progress--; - DBUG_PRINT("info", ("copy_to_buffer_in_progress, buffer #%u 0x%lx: %d", + DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d", (uint) buffer->buffer_no, (ulong) buffer, buffer->copy_to_buffer_in_progress)); if (buffer->copy_to_buffer_in_progress == 0 && buffer->waiting_filling_buffer.last_thread != NULL) - { wqueue_release_queue(&buffer->waiting_filling_buffer); - } DBUG_VOID_RETURN; } @@ -2599,8 +2536,8 @@ static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer) cursor / RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool @@ -2608,14 +2545,11 @@ translog_write_variable_record_chunk2_page(struct st_translog_parts *parts, TRANSLOG_ADDRESS *horizon, struct st_buffer_cursor *cursor) { - struct st_translog_buffer *buffer_to_flush= 0; + struct st_translog_buffer *buffer_to_flush; int rc; - uchar chunk2_header[1]= - { - TRANSLOG_CHUNK_NOHDR - }; - + byte chunk2_header[1]; DBUG_ENTER("translog_write_variable_record_chunk2_page"); + chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; rc= translog_page_next(horizon, cursor, &buffer_to_flush); if (buffer_to_flush != NULL) @@ -2629,7 +2563,9 @@ translog_write_variable_record_chunk2_page(struct st_translog_parts *parts, if (rc) DBUG_RETURN(1); + /* Puts chunk type */ translog_write_data_on_page(horizon, cursor, 1, chunk2_header); + /* Puts chunk body */ translog_write_parts_on_page(horizon, cursor, log_descriptor.page_capacity_chunk_2, parts); DBUG_RETURN(0); @@ -2647,8 +2583,8 @@ translog_write_variable_record_chunk2_page(struct st_translog_parts *parts, cursor / RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool @@ -2657,11 +2593,10 @@ translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, TRANSLOG_ADDRESS *horizon, struct st_buffer_cursor *cursor) { - struct st_translog_buffer *buffer_to_flush= 0; + struct st_translog_buffer *buffer_to_flush; struct st_translog_part part; int rc; - uchar chunk3_header[1 + 2]; - + byte chunk3_header[1 + 2]; DBUG_ENTER("translog_write_variable_record_chunk3_page"); rc= translog_page_next(horizon, cursor, &buffer_to_flush); @@ -2682,11 +2617,12 @@ translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, DBUG_RETURN(0); } - DBUG_ASSERT(parts->current != 0); /* first part is left for - header */ + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ parts->total_record_length+= (part.len= 1 + 2); part.buff= chunk3_header; - *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH); + /* Puts chunk type */ + *chunk3_header= (byte) (TRANSLOG_CHUNK_LNGTH); + /* Puts chunk length */ int2store(chunk3_header + 1, length); parts->current--; set_dynamic(&parts->parts, (gptr) &part, parts->current); @@ -2705,16 +2641,16 @@ translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, last_page_data Plus this data on the last page RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) { - translog_size_t last_page_offset= - log_descriptor.page_overhead + last_page_data; + translog_size_t last_page_offset= (log_descriptor.page_overhead + + last_page_data); translog_size_t offset= (TRANSLOG_PAGE_SIZE - - log_descriptor.bc.current_page_size + + log_descriptor.bc.current_page_fill + pages * TRANSLOG_PAGE_SIZE + last_page_offset); translog_size_t buffer_end_offset, file_end_offset, min_offset; DBUG_ENTER("translog_advance_pointer"); @@ -2722,20 +2658,19 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (uint) (TRANSLOG_PAGE_SIZE - - log_descriptor.bc.current_page_size), + log_descriptor.bc.current_page_fill), pages, (uint) log_descriptor.page_overhead, (uint) last_page_data)); for (;;) { - uint8 new_buffer_no= - (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO; + uint8 new_buffer_no; struct st_translog_buffer *new_buffer; struct st_translog_buffer *old_buffer; buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size; - file_end_offset= - log_descriptor.log_file_max_size - LSN_OFFSET(log_descriptor.horizon); - DBUG_PRINT("info", ("offset: %lu, buffer_end_offs: %lu, " + file_end_offset= (log_descriptor.log_file_max_size - + LSN_OFFSET(log_descriptor.horizon)); + DBUG_PRINT("info", ("offset: %lu buffer_end_offs: %lu, " "file_end_offs: %lu", (ulong) offset, (ulong) buffer_end_offset, (ulong) file_end_offset)); @@ -2762,19 +2697,21 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) translog_buffer_lock(new_buffer); translog_wait_for_buffer_free(new_buffer); - min_offset= (buffer_end_offset < file_end_offset ? - buffer_end_offset : file_end_offset); + min_offset= min(buffer_end_offset, file_end_offset); + /* + TODO: check is it ptr or size enough + */ log_descriptor.bc.buffer->size+= min_offset; log_descriptor.bc.ptr+= min_offset; - DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)", + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", (uint) log_descriptor.bc.buffer->buffer_no, (ulong) log_descriptor.bc.buffer, log_descriptor.bc.chaser, (ulong) log_descriptor.bc.buffer->size, (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. buffer->buffer))); - DBUG_ASSERT((ulong) - (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == + DBUG_ASSERT((ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer->buffer) == log_descriptor.bc.buffer->size); DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == log_descriptor.bc.buffer_no); @@ -2785,7 +2722,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) log_descriptor.horizon+= LSN_ONE_FILE; log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, TRANSLOG_PAGE_SIZE); - DBUG_PRINT("info", ("New file %lu", + DBUG_PRINT("info", ("New file: %lu", (ulong) LSN_FILE_NO(log_descriptor.horizon))); if (translog_create_new_file()) { @@ -2799,42 +2736,41 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) } translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); if (translog_buffer_unlock(old_buffer)) - { DBUG_RETURN(1); - } offset-= min_offset; } log_descriptor.bc.ptr+= offset; log_descriptor.bc.buffer->size+= offset; translog_buffer_increase_writers(log_descriptor.bc.buffer); log_descriptor.horizon+= offset; /* offset increasing */ - log_descriptor.bc.current_page_size= last_page_offset; + log_descriptor.bc.current_page_fill= last_page_offset; DBUG_PRINT("info", ("drop write_counter")); log_descriptor.bc.write_counter= 0; log_descriptor.bc.previous_offset= 0; - DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu), " - "offset: %u last page: %u", + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) " + "offset: %u last page: %u", (uint) log_descriptor.bc.buffer->buffer_no, (ulong) log_descriptor.bc.buffer, log_descriptor.bc.chaser, (ulong) log_descriptor.bc.buffer->size, - (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer-> + (ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer-> buffer), (uint) offset, (uint) last_page_offset)); - DBUG_ASSERT(log_descriptor.bc.chaser - || - ((ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) - == log_descriptor.bc.buffer->size)); - DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == - log_descriptor.bc.buffer_no); DBUG_PRINT("info", ("pointer moved to: (%lu, 0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); - DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer-> + DBUG_ASSERT(log_descriptor.bc.chaser || + ((ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer->buffer) + == log_descriptor.bc.buffer->size)); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + DBUG_ASSERT((log_descriptor.bc.ptr - log_descriptor.bc.buffer-> buffer) %TRANSLOG_PAGE_SIZE == - log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE); - DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); + log_descriptor.bc.current_page_fill % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); log_descriptor.bc.protected= 0; DBUG_RETURN(0); } @@ -2854,7 +2790,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) */ #define translog_get_current_page_rest() \ - (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size) + (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill) /* Get buffer rest in full pages @@ -2889,21 +2825,20 @@ static translog_size_t translog_get_current_group_size() { /* buffer rest in full pages */ translog_size_t buffer_rest= translog_get_current_buffer_rest(); - DBUG_ENTER("translog_get_current_group_size"); + DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest)); - DBUG_PRINT("info", ("buffer_rest in pages %u", buffer_rest)); buffer_rest*= log_descriptor.page_capacity_chunk_2; /* in case of only half of buffer free we can write this and next buffer */ if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2) { - DBUG_PRINT("info", ("buffer_rest %u -> add %lu", - buffer_rest, + DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu", + (ulong) buffer_rest, (ulong) log_descriptor.buffer_capacity_chunk_2)); buffer_rest+= log_descriptor.buffer_capacity_chunk_2; } - DBUG_PRINT("info", ("buffer_rest %u", buffer_rest)); + DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest)); DBUG_RETURN(buffer_rest); } @@ -2924,8 +2859,8 @@ static translog_size_t translog_get_current_group_size() record log type RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool @@ -2943,23 +2878,20 @@ translog_write_variable_record_1group(LSN *lsn, uint i; translog_size_t record_rest, full_pages, first_page; uint additional_chunk3_page= 0; - uchar chunk0_header[1 + 2 + 5 + 2]; - + byte chunk0_header[1 + 2 + 5 + 2]; DBUG_ENTER("translog_write_variable_record_1group"); *lsn= horizon= log_descriptor.horizon; if (log_record_type_descriptor[type].inwrite_hook && - (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, - lsn, parts)) + (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, lsn, parts)) { + translog_unlock(); DBUG_RETURN(1); } cursor= log_descriptor.bc; cursor.chaser= 1; - /* - Advance pointer To be able unlock the loghandler - */ + /* Advance pointer To be able unlock the loghandler */ first_page= translog_get_current_page_rest(); record_rest= parts->record_length - (first_page - header_length); full_pages= record_rest / log_descriptor.page_capacity_chunk_2; @@ -2973,8 +2905,8 @@ translog_write_variable_record_1group(LSN *lsn, record_rest= 1; } - DBUG_PRINT("info", ("first_page: %u (%u), full_pages: %u (%lu), " - "additional: %u (%u), rest %u = %u", + DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) " + "additional: %u (%u) rest %u = %u", first_page, first_page - header_length, full_pages, (ulong) full_pages * @@ -2984,14 +2916,14 @@ translog_write_variable_record_1group(LSN *lsn, (log_descriptor.page_capacity_chunk_2 - 1), record_rest, parts->record_length)); /* record_rest + 3 is chunk type 3 overhead + record_rest */ - translog_advance_pointer(full_pages + additional_chunk3_page, - (record_rest ? record_rest + 3 : 0)); + rc|= translog_advance_pointer(full_pages + additional_chunk3_page, + (record_rest ? record_rest + 3 : 0)); log_descriptor.bc.buffer->last_lsn= *lsn; rc|= translog_unlock(); /* - check if we switched buffer and need process it (current buffer is + Check if we switched buffer and need process it (current buffer is unlocked already => we will not delay other threads */ if (buffer_to_flush != NULL) @@ -3000,7 +2932,6 @@ translog_write_variable_record_1group(LSN *lsn, rc= translog_buffer_flush(buffer_to_flush); rc|= translog_buffer_unlock(buffer_to_flush); } - if (rc) DBUG_RETURN(1); @@ -3011,7 +2942,7 @@ translog_write_variable_record_1group(LSN *lsn, translog_write_parts_on_page(&horizon, &cursor, first_page, parts); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), @@ -3022,7 +2953,7 @@ translog_write_variable_record_1group(LSN *lsn, if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), @@ -3036,29 +2967,28 @@ translog_write_variable_record_1group(LSN *lsn, page_capacity_chunk_2 - 2, &horizon, &cursor)) DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), (ulong) LSN_OFFSET(horizon))); - DBUG_ASSERT(cursor.current_page_size == TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE); } if (translog_write_variable_record_chunk3_page(parts, record_rest, &horizon, &cursor)) DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), (ulong) LSN_OFFSET(horizon))); - rc= translog_buffer_lock(cursor.buffer); - if (!rc) + if (!(rc= translog_buffer_lock(cursor.buffer))) { /* - check if we wrote something on lst not full page and need to reconstruct + Check if we wrote something on 1:st not full page and need to reconstruct CRC and sector protection */ translog_buffer_decrease_writers(cursor.buffer); @@ -3083,8 +3013,8 @@ translog_write_variable_record_1group(LSN *lsn, record log type RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool @@ -3097,7 +3027,7 @@ translog_write_variable_record_1chunk(LSN *lsn, void *tcb) { int rc; - uchar chunk0_header[1 + 2 + 5 + 2]; + byte chunk0_header[1 + 2 + 5 + 2]; DBUG_ENTER("translog_write_variable_record_1chunk"); translog_write_variable_record_1group_header(parts, type, short_trid, @@ -3105,9 +3035,10 @@ translog_write_variable_record_1chunk(LSN *lsn, *lsn= log_descriptor.horizon; if (log_record_type_descriptor[type].inwrite_hook && - (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, - lsn, parts)) + (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, + lsn, parts)) { + translog_unlock(); DBUG_RETURN(1); } @@ -3139,30 +3070,30 @@ translog_write_variable_record_1chunk(LSN *lsn, translog_put_LSN_diff() base_lsn LSN from which we calculate difference lsn LSN for codding - dst pointer before which result should be written + dst Result will be written to dst[-pack_length] .. dst[-1] NOTE: - to store an LSN in a compact way we will use the following compression: + To store an LSN in a compact way we will use the following compression: - if a log record has LSN1, and it contains the lSN2 as a back reference, - instead of LSN2 we write LSN1-LSN2, encoded as: + If a log record has LSN1, and it contains the lSN2 as a back reference, + Instead of LSN2 we write LSN1-LSN2, encoded as: two bits the number N (see below) 14 bits N bytes - that is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 is stored in the first two bits. RETURN - pointer on coded LSN - NULL - error + # pointer on coded LSN + NULL Error */ -static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) +static byte *translog_put_LSN_diff(LSN base_lsn, LSN lsn, byte *dst) { DBUG_ENTER("translog_put_LSN_diff"); - DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx), val: (0x%lu,0x%lx), dst 0x%lx", + DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx) val: (0x%lu,0x%lx) dst: 0x%lx", (ulong) LSN_FILE_NO(base_lsn), (ulong) LSN_OFFSET(base_lsn), (ulong) LSN_FILE_NO(lsn), @@ -3172,9 +3103,14 @@ static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) uint32 diff; DBUG_ASSERT(base_lsn > lsn); diff= base_lsn - lsn; + DBUG_PRINT("info", ("File is the same. Diff: 0x%lx", (ulong) diff)); if (diff <= 0x3FFF) { dst-= 2; + /* + Note we store this high byte first to ensure that first byte has + 0 in the 3 upper bits. + */ dst[0]= diff >> 8; dst[1]= (diff & 0xFF); } @@ -3204,11 +3140,13 @@ static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) ulonglong base_offset= LSN_OFFSET(base_lsn); DBUG_ASSERT(base_lsn > lsn); diff= LSN_FILE_NO(base_lsn) - LSN_FILE_NO(lsn); + DBUG_PRINT("info", ("File is different. Diff: 0x%lx", (ulong) diff)); + if (base_offset < LSN_OFFSET(lsn)) { /* take 1 from file offset */ diff--; - base_offset+= 0x100000000LL; + base_offset+= LL(0x100000000); } offset_diff= base_offset - LSN_OFFSET(lsn); if (diff > 0x3f) @@ -3236,71 +3174,74 @@ static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst) dst pointer to buffer where to write 7byte LSN NOTE: - to store an LSN in a compact way we use the following compression: + To store an LSN in a compact way we will use the following compression: If a log record has LSN1, and it contains the lSN2 as a back reference, - instead of LSN2 we write LSN1-LSN2, encoded as: + Instead of LSN2 we write LSN1-LSN2, encoded as: two bits the number N (see below) 14 bits N bytes - That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 - is stored in the first two bits. + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. RETURN pointer to buffer after decoded LSN */ -static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst) +static byte *translog_get_LSN_from_diff(LSN base_lsn, byte *src, byte *dst) { LSN lsn; uint32 diff; uint32 first_byte; + uint32 file_no, rec_offset; uint8 code; DBUG_ENTER("translog_get_LSN_from_diff"); - DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), src: 0x%lx, dst 0x%lx", + DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx) src: 0x%lx dst 0x%lx", (ulong) LSN_FILE_NO(base_lsn), (ulong) LSN_OFFSET(base_lsn), (ulong) src, (ulong) dst)); first_byte= *((uint8*) src); - code= first_byte & 0xC0; - first_byte &= 0x3F; + code= first_byte >> 6; /* Length in 2 upmost bits */ + first_byte&= 0x3F; + src++; /* Skip length + encode */ + file_no= LSN_FILE_NO(base_lsn); /* Assume relative */ + DBUG_PRINT("info", ("code: %u first byte: %lu", + (uint) code, (ulong) first_byte)); switch (code) { - case 0x00: - lsn= base_lsn - ((first_byte << 8) + *((uint8*) (src + 1))); - src+= 2; + case 0: + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src)); break; - case 0x40: - diff= uint2korr(src + 1); - lsn= base_lsn - ((first_byte << 16) + diff); - src+= 3; + case 1: + diff= uint2korr(src); + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff); break; - case 0x80: - diff= uint3korr(src + 1); - lsn= base_lsn - ((first_byte << 24) + diff); - src+= 4; + case 2: + diff= uint3korr(src); + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff); break; - case 0xC0: + case 3: { - diff= uint4korr(src + 1); ulonglong base_offset= LSN_OFFSET(base_lsn); + diff= uint4korr(src); if (diff > LSN_OFFSET(base_lsn)) { /* take 1 from file offset */ first_byte++; - base_offset+= 0x100000000LL; + base_offset+= LL(0x100000000); } - lsn= MAKE_LSN(LSN_FILE_NO(base_lsn) - first_byte, - base_offset - diff); - src+= 5; + file_no= LSN_FILE_NO(base_lsn) - first_byte; + rec_offset= base_offset - diff; break; } default: DBUG_ASSERT(0); DBUG_RETURN(NULL); } - lsn7store(dst, lsn); + lsn= MAKE_LSN(file_no, rec_offset); + src+= code + 1; + lsn_store(dst, lsn); DBUG_PRINT("info", ("new src: 0x%lx", (ulong) dst)); DBUG_RETURN(src); } @@ -3317,77 +3258,70 @@ static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst) compressed_LSNs buffer which can be used for storing compressed LSN(s) RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, LSN base_lsn, - uint lsns, uchar *compressed_LSNs) + uint lsns, byte *compressed_LSNs) { - struct st_translog_part part; - uint lsns_len= lsns * 7; + struct st_translog_part *part; + uint lsns_len= lsns * LSN_STORE_SIZE; DBUG_ENTER("translog_relative_LSN_encode"); - get_dynamic(&parts->parts, (gptr) &part, parts->current); + part= dynamic_element(&parts->parts, parts->current, + struct st_translog_part *); /* collect all LSN(s) in one chunk if it (they) is (are) divided */ - if (part.len < lsns_len) + if (part->len < lsns_len) { - uint copied= part.len; - DBUG_PRINT("info", ("Using buffer 0x%lx", (ulong) compressed_LSNs)); - memmove(compressed_LSNs, part.buff, part.len); + uint copied= part->len; + DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs)); + memcpy(compressed_LSNs, part->buff, part->len); do { - get_dynamic(&parts->parts, (gptr) &part, parts->current + 1); - if ((part.len + copied) < lsns_len) + struct st_translog_part *next_part; + next_part= dynamic_element(&parts->parts, parts->current + 1, + struct st_translog_part *); + if ((next_part->len + copied) < lsns_len) { - memmove(compressed_LSNs + copied, part.buff, part.len); - copied+= part.len; + memcpy(compressed_LSNs + copied, next_part->buff, next_part->len); + copied+= next_part->len; delete_dynamic_element(&parts->parts, parts->current + 1); } else { uint len= lsns_len - copied; - memmove(compressed_LSNs + copied, part.buff, len); + memcpy(compressed_LSNs + copied, next_part->buff, len); copied= lsns_len; - part.buff+= len; - part.len-= len; - /* - We do not check result of set_dynamic, because we are sure that - it will not grow - */ - set_dynamic(&parts->parts, (gptr) &part, parts->current + 1); + next_part->buff+= len; + next_part->len-= len; } } while (copied < lsns_len); - part.len= lsns_len; - part.buff= compressed_LSNs; + part->len= lsns_len; + part->buff= compressed_LSNs; } { /* Compress */ LSN ref; uint economy; - uchar *ref_ptr= part.buff + lsns_len - 7; - uchar *dst_ptr= part.buff + lsns_len; - uint i; - for (i= 0; i < lsns; i++, ref_ptr-= 7) + byte *ref_ptr= part->buff + lsns_len - LSN_STORE_SIZE; + byte *dst_ptr= part->buff + lsns_len; + for (; ref_ptr >= part->buff ; ref_ptr-= LSN_STORE_SIZE) { - ref= lsn7korr(ref_ptr); + ref= lsn_korr(ref_ptr); if ((dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr)) == NULL) DBUG_RETURN(1); } - economy= (dst_ptr - part.buff); - DBUG_PRINT("info", ("Economy %u", economy)); - part.len-= economy; + /* Note that dst_ptr did grow downward ! */ + economy= (uint) (dst_ptr - part->buff); + DBUG_PRINT("info", ("Economy: %u", economy)); + part->len-= economy; parts->record_length-= economy; parts->total_record_length-= economy; - part.buff= dst_ptr; + part->buff= dst_ptr; } - /* - We do not check result of set_dynamic, because we are sure that - it will not grow - */ - set_dynamic(&parts->parts, (gptr) &part, parts->current); DBUG_RETURN(0); } @@ -3408,8 +3342,8 @@ static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, record log type RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool @@ -3436,19 +3370,18 @@ translog_write_variable_record_mgroup(LSN *lsn, uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1; uint16 last_page_capacity; my_bool new_page_before_chunk0= 1, first_chunk0= 1; - uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1]; - uchar chunk2_header[1]= - { - TRANSLOG_CHUNK_NOHDR - }; + byte chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1]; + byte chunk2_header[1]; uint header_fixed_part= header_length + 2; uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1); - DBUG_ENTER("translog_write_variable_record_mgroup"); + chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; + if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor), 10, 10 CALLER_INFO)) { + translog_unlock(); UNRECOVERABLE_ERROR(("init array failed")); DBUG_RETURN(1); } @@ -3471,35 +3404,34 @@ translog_write_variable_record_mgroup(LSN *lsn, cursor.chaser= 1; if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255) { - /* suzeof(uint8) == 256 is max number of chunk in multi-chunks group */ + /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */ full_pages= 255; buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2; } /* group chunks = - full pages + first page (which actually can be full, too. + full pages + first page (which actually can be full, too). But here we assign number of chunks - 1 */ group.num= full_pages; if (insert_dynamic(&groups, (gptr) &group)) { - translog_unlock(); - delete_dynamic(&groups); UNRECOVERABLE_ERROR(("insert into array failed")); - DBUG_RETURN(1); + goto err_unlock; } - DBUG_PRINT("info", ("chunk #%u first_page: %u (%u), full_pages: %lu (%lu), " + DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) " + "full_pages: %lu (%lu) " "Left %lu", groups.elements, first_page, first_page - 1, (ulong) full_pages, - (ulong) full_pages * - log_descriptor.page_capacity_chunk_2, - (ulong)parts->record_length - - (first_page - 1 + buffer_rest) - - done)); - translog_advance_pointer(full_pages, 0); + (ulong) (full_pages * + log_descriptor.page_capacity_chunk_2), + (ulong)(parts->record_length - (first_page - 1 + + buffer_rest) - + done))); + rc|= translog_advance_pointer(full_pages, 0); rc|= translog_unlock(); @@ -3514,15 +3446,14 @@ translog_write_variable_record_mgroup(LSN *lsn, } if (rc) { - delete_dynamic(&groups); UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); - DBUG_RETURN(1); + goto err; } translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " - "Left: %lu", + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left %lu", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), @@ -3533,12 +3464,10 @@ translog_write_variable_record_mgroup(LSN *lsn, for (i= 0; i < full_pages; i++) { if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) - { - delete_dynamic(&groups); - DBUG_RETURN(1); - } + goto err; - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)" + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) " + "local: (%lu,0x%lx) " "Left: %lu", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), @@ -3551,7 +3480,7 @@ translog_write_variable_record_mgroup(LSN *lsn, done+= (first_page - 1 + buffer_rest); - /* TODO: made separate function for following */ + /* TODO: make separate function for following */ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); if (buffer_to_flush != NULL) { @@ -3564,19 +3493,15 @@ translog_write_variable_record_mgroup(LSN *lsn, } if (rc) { - delete_dynamic(&groups); UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); - DBUG_RETURN(1); + goto err; } rc= translog_buffer_lock(cursor.buffer); if (!rc) translog_buffer_decrease_writers(cursor.buffer); rc|= translog_buffer_unlock(cursor.buffer); if (rc) - { - delete_dynamic(&groups); - DBUG_RETURN(1); - } + goto err; translog_lock(); @@ -3590,10 +3515,8 @@ translog_write_variable_record_mgroup(LSN *lsn, group.num= 0; /* 0 because it does not matter */ if (insert_dynamic(&groups, (gptr) &group)) { - delete_dynamic(&groups); - translog_unlock(); UNRECOVERABLE_ERROR(("insert into array failed")); - DBUG_RETURN(1); + goto err_unlock; } record_rest= parts->record_length - done; DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest)); @@ -3643,34 +3566,37 @@ translog_write_variable_record_mgroup(LSN *lsn, record_rest + header_fixed_part + (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1)) chunk0_pages++; - DBUG_PRINT("info", ("chunk0_pages %u, groups %u, groups per full page %u, " - "Group on last page %u", + DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u " + "Group on last page: %u", chunk0_pages, groups.elements, groups_per_page, (groups.elements - ((page_capacity - header_fixed_part) / (7 + 1)) * (chunk0_pages - 1)))); - DBUG_PRINT("info", ("first_page: %u, chunk2 %u full_pages: %u (%lu), " - "chunk3 %u (%u), rest %u", + DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) " + "chunk3: %u (%u) rest: %u", first_page, chunk2_page, full_pages, (ulong) full_pages * log_descriptor.page_capacity_chunk_2, chunk3_pages, (uint) chunk3_size, (uint) record_rest)); - translog_advance_pointer(full_pages + chunk3_pages + - (chunk0_pages - 1), - record_rest + header_fixed_part + - (groups.elements - - ((page_capacity - header_fixed_part) / (7 + 1)) * - (chunk0_pages - 1)) * (7 + 1)); - translog_unlock(); + rc= translog_advance_pointer(full_pages + chunk3_pages + + (chunk0_pages - 1), + record_rest + header_fixed_part + + (groups.elements - + ((page_capacity - + header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)) * (7 + 1)); + rc|= translog_unlock(); + if (rc) + goto err; if (chunk2_page) { DBUG_PRINT("info", ("chunk 2 to finish first page")); translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " "Left: %lu", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), @@ -3683,19 +3609,19 @@ translog_write_variable_record_mgroup(LSN *lsn, { DBUG_PRINT("info", ("chunk 3")); DBUG_ASSERT(full_pages == 0); - uchar chunk3_header[3]; + byte chunk3_header[3]; + chunk3_pages= 0; chunk3_header[0]= TRANSLOG_CHUNK_LNGTH; int2store(chunk3_header + 1, chunk3_size); translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header); translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts); - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " "Left: %lu", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), (ulong) LSN_OFFSET(horizon), (ulong) (parts->record_length - chunk3_size - done))); - chunk3_pages= 0; } else { @@ -3707,12 +3633,9 @@ translog_write_variable_record_mgroup(LSN *lsn, { DBUG_ASSERT(chunk2_page != 0); if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) - { - delete_dynamic(&groups); - DBUG_RETURN(1); - } + goto err; - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx) " + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " "Left: %lu", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), @@ -3727,18 +3650,15 @@ translog_write_variable_record_mgroup(LSN *lsn, translog_write_variable_record_chunk3_page(parts, chunk3_size, &horizon, &cursor)) - { - delete_dynamic(&groups); - DBUG_RETURN(1); - } - DBUG_PRINT("info", ("absolute horizon (%lu,0x%lx), local (%lu,0x%lx)", + goto err; + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_FILE_NO(horizon), (ulong) LSN_OFFSET(horizon))); - *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN); + *chunk0_header= (byte) (type |TRANSLOG_CHUNK_LSN); int2store(chunk0_header + 1, short_trid); translog_write_variable_record_1group_code_len(chunk0_header + 3, parts->record_length, @@ -3760,24 +3680,20 @@ translog_write_variable_record_mgroup(LSN *lsn, } if (rc) { - delete_dynamic(&groups); UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); - DBUG_RETURN(1); + goto err; } } new_page_before_chunk0= 1; if (first_chunk0) { + first_chunk0= 0; *lsn= horizon; if (log_record_type_descriptor[type].inwrite_hook && (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, lsn, parts)) - { - DBUG_RETURN(1); - } - - first_chunk0= 0; + goto err; } /* @@ -3787,7 +3703,7 @@ translog_write_variable_record_mgroup(LSN *lsn, */ limit= (groups_per_page < groups.elements - curr_group ? groups_per_page : groups.elements - curr_group); - DBUG_PRINT("info", ("Groups: %u curr %u, limit %u", + DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u", (uint) groups.elements, (uint) curr_group, (uint) limit)); @@ -3810,9 +3726,11 @@ translog_write_variable_record_mgroup(LSN *lsn, chunk0_header); for (i= curr_group; i < limit + curr_group; i++) { - get_dynamic(&groups, (gptr) &group, i); - lsn7store(group_desc, group.addr); - group_desc[7]= group.num; + struct st_translog_group_descriptor *grp_ptr; + grp_ptr= dynamic_element(&groups, i, + struct st_translog_group_descriptor *); + lsn_store(group_desc, grp_ptr->addr); + group_desc[7]= grp_ptr->num; translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc); } @@ -3831,6 +3749,12 @@ translog_write_variable_record_mgroup(LSN *lsn, delete_dynamic(&groups); DBUG_RETURN(rc); + +err_unlock: + translog_unlock(); +err: + delete_dynamic(&groups); + DBUG_RETURN(1); } @@ -3847,8 +3771,8 @@ translog_write_variable_record_mgroup(LSN *lsn, record log type RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_write_variable_record(LSN *lsn, @@ -3862,17 +3786,17 @@ static my_bool translog_write_variable_record(LSN *lsn, translog_variable_record_length_bytes(parts->record_length); ulong buffer_rest; uint page_rest; - uchar compressed_LSNs[2 * 7]; /* Max number of such LSNs per - record is 2 */ + /* Max number of such LSNs per record is 2 */ + byte compressed_LSNs[2 * LSN_STORE_SIZE]; DBUG_ENTER("translog_write_variable_record"); translog_lock(); - DBUG_PRINT("info", ("horizon (%lu,0x%lx)", + DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); - page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size; - DBUG_PRINT("info", ("header length %u, page_rest: %u", + page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill; + DBUG_PRINT("info", ("header length: %u page_rest: %u", header_length1, page_rest)); /* @@ -3883,12 +3807,13 @@ static my_bool translog_write_variable_record(LSN *lsn, (header_length1 + log_record_type_descriptor[type].read_header_len)) { DBUG_PRINT("info", - ("Next page, size: %u, header: %u + %u", - log_descriptor.bc.current_page_size, + ("Next page, size: %u header: %u + %u", + log_descriptor.bc.current_page_fill, header_length1, log_record_type_descriptor[type].read_header_len)); translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, &buffer_to_flush); + /* Chunk 2 header is 1 byte, so full page capacity will be one byte more */ page_rest= log_descriptor.page_capacity_chunk_2 + 1; DBUG_PRINT("info", ("page_rest: %u", page_rest)); } @@ -3897,26 +3822,30 @@ static my_bool translog_write_variable_record(LSN *lsn, To minimize compressed size we will compress always relative to very first chunk address (log_descriptor.horizon for now) */ - if (log_record_type_descriptor[type].compresed_LSN > 0) + if (log_record_type_descriptor[type].compressed_LSN > 0) { if (translog_relative_LSN_encode(parts, log_descriptor.horizon, log_record_type_descriptor[type]. - compresed_LSN, compressed_LSNs)) + compressed_LSN, compressed_LSNs)) { - int rc= translog_unlock(); + translog_unlock(); if (buffer_to_flush != NULL) { - if (!rc) - rc= translog_buffer_flush(buffer_to_flush); - rc|= translog_buffer_unlock(buffer_to_flush); + /* + It is just try to finish log in nice way in case of error, so we + do not check result of the following functions, because we are + going return error state in any case + */ + translog_buffer_flush(buffer_to_flush); + translog_buffer_unlock(buffer_to_flush); } DBUG_RETURN(1); } /* recalculate header length after compression */ header_length1= 1 + 2 + 2 + translog_variable_record_length_bytes(parts->record_length); - DBUG_PRINT("info", ("after compressing LSN(s) header length %u, " - "record length %lu", + DBUG_PRINT("info", ("after compressing LSN(s) header length: %u " + "record length: %lu", header_length1, (ulong)parts->record_length)); } @@ -3943,7 +3872,6 @@ static my_bool translog_write_variable_record(LSN *lsn, parts, buffer_to_flush, header_length1, buffer_rest, tcb)); - DBUG_RETURN(0); } @@ -3960,8 +3888,8 @@ static my_bool translog_write_variable_record(LSN *lsn, record log type RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_write_fixed_record(LSN *lsn, @@ -3971,9 +3899,9 @@ static my_bool translog_write_fixed_record(LSN *lsn, void *tcb) { struct st_translog_buffer *buffer_to_flush= NULL; - uchar chunk1_header[1 + 2]; - uchar compressed_LSNs[2 * 7]; /* Max number of such LSNs per - record is 2 */ + byte chunk1_header[1 + 2]; + /* Max number of such LSNs per record is 2 */ + byte compressed_LSNs[2 * LSN_STORE_SIZE]; struct st_translog_part part; int rc; DBUG_ENTER("translog_write_fixed_record"); @@ -3984,32 +3912,32 @@ static my_bool translog_write_fixed_record(LSN *lsn, (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH && (parts->record_length - - log_record_type_descriptor[type].compresed_LSN * 2) <= + log_record_type_descriptor[type].compressed_LSN * 2) <= log_record_type_descriptor[type].fixed_length)); translog_lock(); - DBUG_PRINT("info", ("horizon (%lu,0x%lx)", + DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); - DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); DBUG_PRINT("info", - ("Page size: %u, record %u, next cond %d", - log_descriptor.bc.current_page_size, + ("Page size: %u record: %u next cond: %d", + log_descriptor.bc.current_page_fill, (parts->record_length - - log_record_type_descriptor[type].compresed_LSN * 2 + 3), - ((((uint) log_descriptor.bc.current_page_size) + + log_record_type_descriptor[type].compressed_LSN * 2 + 3), + ((((uint) log_descriptor.bc.current_page_fill) + (parts->record_length - - log_record_type_descriptor[type].compresed_LSN * 2 + 3)) > + log_record_type_descriptor[type].compressed_LSN * 2 + 3)) > TRANSLOG_PAGE_SIZE))); /* check that there is enough place on current page: (log_record_type_descriptor[type].fixed_length - economized on compressed LSNs) bytes */ - if ((((uint) log_descriptor.bc.current_page_size) + + if ((((uint) log_descriptor.bc.current_page_fill) + (parts->record_length - - log_record_type_descriptor[type].compresed_LSN * 2 + 3)) > + log_record_type_descriptor[type].compressed_LSN * 2 + 3)) > TRANSLOG_PAGE_SIZE) { DBUG_PRINT("info", ("Next page")); @@ -4022,17 +3950,17 @@ static my_bool translog_write_fixed_record(LSN *lsn, (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, lsn, parts)) { - DBUG_RETURN(1); + rc= 1; + goto err; } - /* compress LSNs */ if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH) { - DBUG_ASSERT(log_record_type_descriptor[type].compresed_LSN > 0); + DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0); if (translog_relative_LSN_encode(parts, *lsn, log_record_type_descriptor[type]. - compresed_LSN, compressed_LSNs)) + compressed_LSN, compressed_LSNs)) { rc= 1; goto err; @@ -4040,14 +3968,13 @@ static my_bool translog_write_fixed_record(LSN *lsn, } /* - Write the whole record at once (we sure that there is enough place on - the destination page + Write the whole record at once (we know that there is enough place on + the destination page) */ - DBUG_ASSERT(parts->current != 0); /* first part is left for - header */ + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ parts->total_record_length+= (part.len= 1 + 2); part.buff= chunk1_header; - *chunk1_header= (uchar) (type |TRANSLOG_CHUNK_FIXED); + *chunk1_header= (byte) (type | TRANSLOG_CHUNK_FIXED); int2store(chunk1_header + 1, short_trid); parts->current--; set_dynamic(&parts->parts, (gptr) &part, parts->current); @@ -4057,6 +3984,7 @@ static my_bool translog_write_fixed_record(LSN *lsn, parts->total_record_length, parts); log_descriptor.bc.buffer->last_lsn= *lsn; + err: rc|= translog_unlock(); @@ -4090,8 +4018,8 @@ err: 0 sign of the end of parts RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ my_bool translog_write_record(LSN *lsn, @@ -4099,13 +4027,15 @@ my_bool translog_write_record(LSN *lsn, SHORT_TRANSACTION_ID short_trid, void *tcb, translog_size_t part1_length, - uchar *part1_buff, ...) + byte *part1_buff, ...) { struct st_translog_parts parts; + struct st_translog_part part; va_list pvar; int rc; DBUG_ENTER("translog_write_record"); - DBUG_PRINT("enter", ("type %u, ShortTrID %u", (uint) type, (uint)short_trid)); + DBUG_PRINT("enter", ("type: %u ShortTrID: %u", + (uint) type, (uint)short_trid)); /* move information about parts into dynamic array */ if (init_dynamic_array(&parts.parts, sizeof(struct st_translog_part), @@ -4114,58 +4044,55 @@ my_bool translog_write_record(LSN *lsn, UNRECOVERABLE_ERROR(("init array failed")); DBUG_RETURN(1); } + + /* reserve place for header */ + parts.current= 1; + part.len= 0; + part.buff= 0; + if (insert_dynamic(&parts.parts, (gptr) &part)) { - struct st_translog_part part; + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } - /* reserve place for header */ - parts.current= 1; - part.len= 0; - part.buff= 0; + parts.record_length= part.len= part1_length; + part.buff= part1_buff; + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("record length: %lu %lu ...", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + + /* count record length */ + va_start(pvar, part1_buff); + for (;;) + { + part.len= va_arg(pvar, translog_size_t); + if (part.len == 0) + break; + parts.record_length+= part.len; + part.buff= va_arg(pvar, byte*); if (insert_dynamic(&parts.parts, (gptr) &part)) { UNRECOVERABLE_ERROR(("insert into array failed")); DBUG_RETURN(1); } - - parts.record_length= part.len= part1_length; - part.buff= part1_buff; - if (insert_dynamic(&parts.parts, (gptr) &part)) - { - UNRECOVERABLE_ERROR(("insert into array failed")); - DBUG_RETURN(1); - } - DBUG_PRINT("info", ("record length: %lu, %lu ...", + DBUG_PRINT("info", ("record length: %lu %lu ...", (ulong) parts.record_length, (ulong) parts.total_record_length)); - - /* count record length */ - va_start(pvar, part1_buff); - for (;;) - { - part.len= va_arg(pvar, translog_size_t); - if (part.len == 0) - break; - parts.record_length+= part.len; - part.buff= va_arg(pvar, uchar*); - if (insert_dynamic(&parts.parts, (gptr) &part)) - { - UNRECOVERABLE_ERROR(("insert into array failed")); - DBUG_RETURN(1); - } - DBUG_PRINT("info", ("record length: %lu, %lu ...", - (ulong) parts.record_length, - (ulong) parts.total_record_length)); - } - va_end(pvar); - - /* - start total_record_length from record_length then overhead will - be add - */ - parts.total_record_length= parts.record_length; } va_end(pvar); - DBUG_PRINT("info", ("record length: %lu, %lu", + + /* + Start total_record_length from record_length then overhead will + be add + */ + parts.total_record_length= parts.record_length; + va_end(pvar); + DBUG_PRINT("info", ("record length: %lu %lu", (ulong) parts.record_length, (ulong) parts.total_record_length)); @@ -4174,19 +4101,14 @@ my_bool translog_write_record(LSN *lsn, (*log_record_type_descriptor[type].prewrite_hook) (type, tcb, &parts)))) { - switch (log_record_type_descriptor[type].class) - { + switch (log_record_type_descriptor[type].class) { case LOGRECTYPE_VARIABLE_LENGTH: - { rc= translog_write_variable_record(lsn, type, short_trid, &parts, tcb); break; - } case LOGRECTYPE_PSEUDOFIXEDLENGTH: case LOGRECTYPE_FIXEDLENGTH: - { rc= translog_write_fixed_record(lsn, type, short_trid, &parts, tcb); break; - } case LOGRECTYPE_NOT_ALLOWED: default: DBUG_ASSERT(0); @@ -4213,11 +4135,11 @@ my_bool translog_write_record(LSN *lsn, position in sources after decoded LSN(s) */ -static uchar *translog_relative_LSN_decode(LSN base_lsn, - uchar *src, uchar *dst, uint lsns) +static byte *translog_relative_LSN_decode(LSN base_lsn, + byte *src, byte *dst, uint lsns) { uint i; - for (i= 0; i < lsns; i++, dst+= 7) + for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE) { src= translog_get_LSN_from_diff(base_lsn, src, dst); } @@ -4235,21 +4157,21 @@ static uchar *translog_relative_LSN_decode(LSN base_lsn, buff Buffer to be filled with header data RETURN - 0 - error - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header */ -translog_size_t translog_fixed_length_header(uchar *page, +translog_size_t translog_fixed_length_header(byte *page, translog_size_t page_offset, TRANSLOG_HEADER_BUFFER *buff) { struct st_log_record_type_descriptor *desc= log_record_type_descriptor + buff->type; - uchar *src= page + page_offset + 3; - uchar *dst= buff->header; - uchar *start= src; - uint lsns= desc->compresed_LSN; + byte *src= page + page_offset + 3; + byte *dst= buff->header; + byte *start= src; + uint lsns= desc->compressed_LSN; uint length= desc->fixed_length + (lsns * 2); DBUG_ENTER("translog_fixed_length_header"); @@ -4260,7 +4182,7 @@ translog_size_t translog_fixed_length_header(uchar *page, { DBUG_ASSERT(lsns > 0); src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns); - lsns*= 7; + lsns*= LSN_STORE_SIZE; dst+= lsns; length-= lsns; buff->compressed_LSN_economy= (uint16) (lsns - (src - start)); @@ -4268,7 +4190,7 @@ translog_size_t translog_fixed_length_header(uchar *page, else buff->compressed_LSN_economy= 0; - memmove(dst, src, length); + memcpy(dst, src, length); buff->non_header_data_start_offset= page_offset + ((src + length) - (page + page_offset)); buff->non_header_data_len= 0; @@ -4320,23 +4242,21 @@ static void translog_scanner_set_horizon(struct st_translog_scanner_data scanner Information about current chunk during scanning RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ -static my_bool translog_scanner_set_last_page(struct st_translog_scanner_data +static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner) { my_bool page_ok; scanner->last_file_page= scanner->page_addr; - if (translog_get_last_page_addr(&scanner->last_file_page, &page_ok)) - return (1); - return (0); + return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok)); } /* - Init scanner + Initialize reader scanner SYNOPSIS translog_init_scanner() @@ -4346,29 +4266,30 @@ static my_bool translog_scanner_set_last_page(struct st_translog_scanner_data scanner scanner which have to be inited RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ -static my_bool translog_init_scanner(LSN lsn, - my_bool fixed_horizon, - struct st_translog_scanner_data *scanner) -{ - TRANSLOG_VALIDATOR_DATA data= - { - &scanner->page_addr, 0 - }; +my_bool translog_init_scanner(LSN lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner) +{ + TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_init_scanner"); DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn)); DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); + + data.addr= &scanner->page_addr; + data.was_recovered= 0; + scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; scanner->fixed_horizon= fixed_horizon; translog_scanner_set_horizon(scanner); - DBUG_PRINT("info", ("Horizon: (0x%lu,0x%lx)", + DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)", (ulong) LSN_FILE_NO(scanner->horizon), (ulong) LSN_OFFSET(scanner->horizon))); @@ -4395,14 +4316,14 @@ static my_bool translog_init_scanner(LSN lsn, scanner Information about current chunk during scanning RETURN - 1 - End of the Log - 0 - OK + 1 End of the Log + 0 OK */ -static my_bool translog_scanner_eol(struct st_translog_scanner_data *scanner) +static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner) { DBUG_ENTER("translog_scanner_eol"); DBUG_PRINT("enter", - ("Horizon: (%lu, 0x%lx), Current: (%lu, 0x%lx+0x%x=0x%lx)", + ("Horizon: (%lu, 0x%lx) Current: (%lu, 0x%lx+0x%x=0x%lx)", (ulong) LSN_FILE_NO(scanner->horizon), (ulong) LSN_OFFSET(scanner->horizon), (ulong) LSN_FILE_NO(scanner->page_addr), @@ -4438,10 +4359,10 @@ static my_bool translog_scanner_eol(struct st_translog_scanner_data *scanner) scanner Information about current chunk during scanning RETURN - 1 - End of the Page - 0 - OK + 1 End of the Page + 0 OK */ -static my_bool translog_scanner_eop(struct st_translog_scanner_data *scanner) +static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner) { DBUG_ENTER("translog_scanner_eop"); DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE || @@ -4458,16 +4379,16 @@ static my_bool translog_scanner_eop(struct st_translog_scanner_data *scanner) scanner Information about current chunk during scanning RETURN - 1 - End of the File - 0 - OK + 1 End of the File + 0 OK */ -static my_bool translog_scanner_eof(struct st_translog_scanner_data *scanner) +static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner) { DBUG_ENTER("translog_scanner_eof"); DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->last_file_page)); - DBUG_PRINT("enter", ("curr Page 0x%lx, last page 0x%lx, " - "normal EOF %d", + DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx " + "normal EOF: %d", (ulong) LSN_OFFSET(scanner->page_addr), (ulong) LSN_OFFSET(scanner->last_file_page), LSN_OFFSET(scanner->page_addr) == @@ -4489,16 +4410,19 @@ static my_bool translog_scanner_eof(struct st_translog_scanner_data *scanner) scanner Information about current chunk during scanning RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ -static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) +static my_bool +translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner) { + uint16 len; + TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_get_next_chunk"); - uint16 len= translog_get_total_chunk_length(scanner->page, - scanner->page_offset); - if (len == 0) + + if ((len= translog_get_total_chunk_length(scanner->page, + scanner->page_offset)) == 0) DBUG_RETURN(1); scanner->page_offset+= len; @@ -4512,7 +4436,7 @@ static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) { if (translog_scanner_eof(scanner)) { - DBUG_PRINT("info", ("horizon (%lu,0x%lx) pageaddr (%lu,0x%lx)", + DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)", (ulong) LSN_FILE_NO(scanner->horizon), (ulong) LSN_OFFSET(scanner->horizon), (ulong) LSN_FILE_NO(scanner->page_addr), @@ -4530,14 +4454,12 @@ static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) { scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */ } - { - TRANSLOG_VALIDATOR_DATA data= - { - &scanner->page_addr, 0 - }; - if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) - DBUG_RETURN(1); - } + + data.addr= &scanner->page_addr; + data.was_recovered= 0; + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + scanner->page_offset= translog_get_first_chunk_offset(scanner->page); if (translog_scanner_eol(scanner)) { @@ -4545,7 +4467,7 @@ static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) scanner->page_offset= 0; DBUG_RETURN(0); } - DBUG_ASSERT(scanner->page[scanner->page_offset] != 0); + DBUG_ASSERT(scanner->page[scanner->page_offset]); } DBUG_RETURN(0); } @@ -4564,35 +4486,34 @@ static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner) it differ from LSN page RETURN - 0 - error - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header */ -translog_size_t translog_variable_length_header(uchar *page, +translog_size_t translog_variable_length_header(byte *page, translog_size_t page_offset, TRANSLOG_HEADER_BUFFER *buff, - struct - st_translog_scanner_data + TRANSLOG_SCANNER_DATA *scanner) { - struct st_log_record_type_descriptor *desc= - log_record_type_descriptor + buff->type; - uchar *src= page + page_offset + 1 + 2; - uchar *dst= buff->header; + struct st_log_record_type_descriptor *desc= (log_record_type_descriptor + + buff->type); + byte *src= page + page_offset + 1 + 2; + byte *dst= buff->header; LSN base_lsn; - uint lsns= desc->compresed_LSN; + uint lsns= desc->compressed_LSN; uint16 chunk_len; uint16 length= desc->read_header_len + (lsns * 2); uint16 buffer_length= length; uint16 body_len; - struct st_translog_scanner_data internal_scanner; + TRANSLOG_SCANNER_DATA internal_scanner; DBUG_ENTER("translog_variable_length_header"); buff->record_length= translog_variable_record_1group_decode_len(&src); chunk_len= uint2korr(src); - DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, length %u, bufflen %u", + DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u", (ulong) buff->record_length, (uint) chunk_len, (uint) length, (uint) buffer_length)); if (chunk_len == 0) @@ -4603,8 +4524,7 @@ translog_size_t translog_variable_length_header(uchar *page, page_rest= TRANSLOG_PAGE_SIZE - (src - page); base_lsn= buff->lsn; - body_len= (page_rest < buff->record_length ? - page_rest : buff->record_length); + body_len= min(page_rest, buff->record_length); } else { @@ -4614,9 +4534,9 @@ translog_size_t translog_variable_length_header(uchar *page, DBUG_PRINT("info", ("multi-group")); grp_no= buff->groups_no= uint2korr(src + 2); - if ((buff->groups= - (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * buff->groups_no, - MYF(0))) == 0) + if (!(buff->groups= + (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no, + MYF(0)))) DBUG_RETURN(0); DBUG_PRINT("info", ("Groups: %u", (uint) grp_no)); src+= (2 + 2); @@ -4627,20 +4547,20 @@ translog_size_t translog_variable_length_header(uchar *page, for (;;) { - uint i; - uint read= grp_no; + uint i, read= grp_no; buff->chunk0_pages++; if (page_rest < grp_no * (7 + 1)) read= page_rest / (7 + 1); - DBUG_PRINT("info", ("Read chunk0 page#%u read %u left %u start from %u", + DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u " + "start from: %u", buff->chunk0_pages, read, grp_no, curr)); for (i= 0; i < read; i++, curr++) { DBUG_ASSERT(curr < buff->groups_no); - buff->groups[curr].addr= lsn7korr(src + i * (7 + 1)); + buff->groups[curr].addr= lsn_korr(src + i * (7 + 1)); buff->groups[curr].num= src[i * (7 + 1) + 7]; - DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks %u", + DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks: %u", curr, (ulong) LSN_FILE_NO(buff->groups[curr].addr), (ulong) LSN_OFFSET(buff->groups[curr].addr), @@ -4653,16 +4573,16 @@ translog_size_t translog_variable_length_header(uchar *page, { buff->chunk0_data_addr= scanner->page_addr; buff->chunk0_data_addr+= (page_offset + header_to_skip + - i * (7 + 1)); /* offset increased */ + read * (7 + 1)); /* offset increased */ } else { buff->chunk0_data_addr= buff->lsn; /* offset increased */ - buff->chunk0_data_addr+= (header_to_skip + i * (7 + 1)); + buff->chunk0_data_addr+= (header_to_skip + read * (7 + 1)); } - buff->chunk0_data_len= chunk_len - 2 - i * (7 + 1); - DBUG_PRINT("info", ("Data address (%lu,0x%lx), len: %u", + buff->chunk0_data_len= chunk_len - 2 - read * (7 + 1); + DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u", (ulong) LSN_FILE_NO(buff->chunk0_data_addr), (ulong) LSN_OFFSET(buff->chunk0_data_addr), buff->chunk0_data_len)); @@ -4670,7 +4590,7 @@ translog_size_t translog_variable_length_header(uchar *page, } if (scanner == NULL) { - DBUG_PRINT("info", ("use internal scanner for header reding")); + DBUG_PRINT("info", ("use internal scanner for header reading")); scanner= &internal_scanner; translog_init_scanner(buff->lsn, 1, scanner); } @@ -4700,15 +4620,15 @@ translog_size_t translog_variable_length_header(uchar *page, } if (lsns) { - uchar *start= src; + byte *start= src; src= translog_relative_LSN_decode(base_lsn, src, dst, lsns); - lsns*= 7; + lsns*= LSN_STORE_SIZE; dst+= lsns; length-= lsns; buff->record_length+= (buff->compressed_LSN_economy= (uint16) (lsns - (src - start))); - DBUG_PRINT("info", ("lsns: %u, length %u, economy %u, new length %lu", - lsns / 7, (uint) length, + DBUG_PRINT("info", ("lsns: %u length: %u economy: %u new length: %lu", + lsns / LSN_STORE_SIZE, (uint) length, (uint) buff->compressed_LSN_economy, (ulong) buff->record_length)); body_len-= (src - start); @@ -4718,10 +4638,10 @@ translog_size_t translog_variable_length_header(uchar *page, DBUG_ASSERT(body_len >= length); body_len-= length; - memmove(dst, src, length); + memcpy(dst, src, length); buff->non_header_data_start_offset= src + length - page; buff->non_header_data_len= body_len; - DBUG_PRINT("info", ("non_header_data_start_offset %u len %u buffer %u", + DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u", buff->non_header_data_start_offset, buff->non_header_data_len, buffer_length)); DBUG_RETURN(buffer_length); @@ -4736,17 +4656,16 @@ translog_size_t translog_variable_length_header(uchar *page, page page content buffer page_offset offset of the chunk in the page buff destination buffer - scanner if it is need this scanner will be moved to the + scanner If this is set the scanner will be moved to the record header page (differ from LSN page in case of - multi-group records + multi-group records) */ translog_size_t -translog_read_record_header_from_buffer(uchar *page, +translog_read_record_header_from_buffer(byte *page, uint16 page_offset, TRANSLOG_HEADER_BUFFER *buff, - struct - st_translog_scanner_data *scanner) + TRANSLOG_SCANNER_DATA *scanner) { DBUG_ENTER("translog_read_record_header_from_buffer"); DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) == @@ -4760,8 +4679,7 @@ translog_read_record_header_from_buffer(uchar *page, (ulong) LSN_FILE_NO(buff->lsn), (ulong) LSN_OFFSET(buff->lsn))); /* Read required bytes from the header and call hook */ - switch (log_record_type_descriptor[buff->type].class) - { + switch (log_record_type_descriptor[buff->type].class) { case LOGRECTYPE_VARIABLE_LENGTH: DBUG_RETURN(translog_variable_length_header(page, page_offset, buff, scanner)); @@ -4771,7 +4689,7 @@ translog_read_record_header_from_buffer(uchar *page, default: DBUG_ASSERT(0); } - DBUG_RETURN(0); + DBUG_RETURN(0); /* purecov: deadcode */ } @@ -4794,17 +4712,18 @@ translog_read_record_header_from_buffer(uchar *page, length) RETURN - 0 - error - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header */ translog_size_t translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff) { - uchar buffer[TRANSLOG_PAGE_SIZE], *page; + byte buffer[TRANSLOG_PAGE_SIZE], *page; translog_size_t page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; - + TRANSLOG_ADDRESS addr; + TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_read_record_header"); DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn))); @@ -4812,16 +4731,12 @@ translog_size_t translog_read_record_header(LSN lsn, buff->lsn= lsn; buff->groups_no= 0; - { - TRANSLOG_ADDRESS addr= lsn; - TRANSLOG_VALIDATOR_DATA data= - { - &addr, 0 - }; - addr-= page_offset; /* offset decreasing */ - if ((page= translog_get_page(&data, buffer)) == NULL) - DBUG_RETURN(0); - } + data.addr= &addr; + data.was_recovered= 0; + addr= lsn; + addr-= page_offset; /* offset decreasing */ + if (!(page= translog_get_page(&data, buffer))) + DBUG_RETURN(0); DBUG_RETURN(translog_read_record_header_from_buffer(page, page_offset, buff, 0)); @@ -4846,20 +4761,20 @@ translog_size_t translog_read_record_header(LSN lsn, length) RETURN - 0 - error - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header */ translog_size_t -translog_read_record_header_scan(struct st_translog_scanner_data +translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner, TRANSLOG_HEADER_BUFFER *buff, my_bool move_scanner) { DBUG_ENTER("translog_read_record_header_scan"); - DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx), Hrz: (%lu,0x%lx), " - "Lst: (%lu,0x%lx), Offset: %u(%x), fixed %d", + DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed %d", (ulong) LSN_FILE_NO(scanner->page_addr), (ulong) LSN_OFFSET(scanner->page_addr), (ulong) LSN_FILE_NO(scanner->horizon), @@ -4885,59 +4800,37 @@ translog_read_record_header_scan(struct st_translog_scanner_data SYNOPSIS translog_read_next_record_header() - lsn log record serial number (address of the record) - previous to the record which will be read - If LSN present scanner will be initialized from it, - do not use LSN after initialization for fast scanning. - buff log record header buffer - fixed_horizon true if it is OK do not read records which was written - after scanning beginning scanner data for scanning if lsn is NULL scanner data will be used for continue scanning. The scanner can be NULL. + buff log record header buffer NOTE - - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed - correctly (lsn in buffer will be replaced by next record, but initial - lsn will be read correctly). - it is like translog_read_record_header, but read next record, so see its NOTES. - in case of end of the log buff->lsn will be set to - (CONTROL_FILE_IMPOSSIBLE_FILENO, 0) + (CONTROL_FILE_IMPOSSIBLE_LSN) RETURN - 0 - error - TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 - End of the log - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header + 0 error + TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 End of the log + # number of bytes in + TRANSLOG_HEADER_BUFFER::header + where stored decoded + part of the header */ -translog_size_t translog_read_next_record_header(LSN lsn, - TRANSLOG_HEADER_BUFFER *buff, - my_bool fixed_horizon, - struct - st_translog_scanner_data - *scanner) +translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff) { - struct st_translog_scanner_data internal_scanner; uint8 chunk_type; buff->groups_no= 0; /* to be sure that we will free it right */ DBUG_ENTER("translog_read_next_record_header"); DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner)); - if (scanner == NULL) - { - DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); - scanner= &internal_scanner; - } - if (lsn) - { - if (translog_init_scanner(lsn, fixed_horizon, scanner)) - DBUG_RETURN(0); - DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); - } - DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx), Hrz: (%lu,0x%lx), " - "Lst: (%lu,0x%lx), Offset: %u(%x), fixed %d", + DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d", (ulong) LSN_FILE_NO(scanner->page_addr), (ulong) LSN_OFFSET(scanner->page_addr), (ulong) LSN_FILE_NO(scanner->horizon), @@ -4952,7 +4845,7 @@ translog_size_t translog_read_next_record_header(LSN lsn, if (translog_get_next_chunk(scanner)) DBUG_RETURN(0); chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE; - DBUG_PRINT("info", ("type %x, byte %x", (uint) chunk_type, + DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type, (uint) scanner->page[scanner->page_offset])); } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type != TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0); @@ -4961,7 +4854,8 @@ translog_size_t translog_read_next_record_header(LSN lsn, { /* Last record was read */ buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN; - DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); /* just it is not error */ + /* Return 'end of log' marker */ + DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); } DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0)); } @@ -4976,8 +4870,8 @@ translog_size_t translog_read_next_record_header(LSN lsn, data data cursor RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data @@ -4986,7 +4880,6 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data translog_size_t new_current_offset= data->current_offset + data->chunk_size; uint16 chunk_header_len, chunk_len; uint8 type; - DBUG_ENTER("translog_record_read_next_chunk"); if (data->eor) @@ -5002,7 +4895,7 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data /* Goto next group */ data->current_group++; data->current_chunk= 0; - DBUG_PRINT("info", ("skip to group #%u", data->current_group)); + DBUG_PRINT("info", ("skip to group: #%u", data->current_group)); translog_init_scanner(data->header.groups[data->current_group].addr, 1, &data->scanner); } @@ -5017,7 +4910,7 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no) { DBUG_PRINT("info", - ("Last chunk: data len %u, offset %u group %u of %u", + ("Last chunk: data len: %u offset: %u group: %u of %u", data->header.chunk0_data_len, data->scanner.page_offset, data->current_group, data->header.groups_no - 1)); DBUG_ASSERT(data->header.groups_no - 1 == data->current_group); @@ -5045,8 +4938,8 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data data->chunk_size= chunk_len - chunk_header_len; data->body_offset= data->scanner.page_offset + chunk_header_len; data->current_offset= new_current_offset; - DBUG_PRINT("info", ("grp: %u chunk %u body_offset %u, chunk_size %u, " - "current_offset %lu", + DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u " + "current_offset: %lu", (uint) data->current_group, (uint) data->current_chunk, (uint) data->body_offset, @@ -5064,8 +4957,8 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data data reader data to initialize RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ static my_bool translog_init_reader_data(LSN lsn, @@ -5073,8 +4966,8 @@ static my_bool translog_init_reader_data(LSN lsn, { DBUG_ENTER("translog_init_reader_data"); if (translog_init_scanner(lsn, 1, &data->scanner) || - (data->read_header= - translog_read_record_header_scan(&data->scanner, &data->header, 1)) == 0) + !(data->read_header= + translog_read_record_header_scan(&data->scanner, &data->header, 1))) { DBUG_RETURN(1); } @@ -5084,8 +4977,8 @@ static my_bool translog_init_reader_data(LSN lsn, data->current_group= 0; data->current_chunk= 0; data->eor= 0; - DBUG_PRINT("info", ("read_header %u, " - "body_offset %u, chunk_size %u, current_offset %lu", + DBUG_PRINT("info", ("read_header: %u " + "body_offset: %u chunk_size: %u current_offset: %lu", (uint) data->read_header, (uint) data->body_offset, (uint) data->chunk_size, (ulong) data->current_offset)); @@ -5099,10 +4992,10 @@ static my_bool translog_init_reader_data(LSN lsn, SYNOPSIS translog_read_record_header() lsn log record serial number (address of the record) - offset from the beginning of the record beginning (read + offset From the beginning of the record beginning (read§ by translog_read_record_header). - length length of record part which have to be read. - buffer buffer where to read the record part (have to be at + length Length of record part which have to be read. + buffer Buffer where to read the record part (have to be at least 'length' bytes length) RETURN @@ -5112,13 +5005,12 @@ static my_bool translog_init_reader_data(LSN lsn, translog_size_t translog_read_record(LSN lsn, translog_size_t offset, translog_size_t length, - uchar *buffer, + byte *buffer, struct st_translog_reader_data *data) { translog_size_t requested_length= length; translog_size_t end= offset + length; struct st_translog_reader_data internal_data; - DBUG_ENTER("translog_read_record"); if (data == NULL) @@ -5133,9 +5025,9 @@ translog_size_t translog_read_record(LSN lsn, if (translog_init_reader_data(lsn, data)) DBUG_RETURN(0); } - DBUG_PRINT("info", ("Offset %lu, length %lu " - "Scanner: Cur: (%lu,0x%lx), Hrz: (%lu,0x%lx), " - "Lst: (%lu,0x%lx), Offset: %u(%x), fixed %d", + DBUG_PRINT("info", ("Offset: %lu length: %lu " + "Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d", (ulong) offset, (ulong) length, (ulong) LSN_FILE_NO(data->scanner.page_addr), (ulong) LSN_OFFSET(data->scanner.page_addr), @@ -5148,36 +5040,37 @@ translog_size_t translog_read_record(LSN lsn, data->scanner.fixed_horizon)); if (offset < data->read_header) { + uint16 len= min(data->read_header, end) - offset; DBUG_PRINT("info", - ("enter header offset %lu, length %lu", + ("enter header offset: %lu length: %lu", (ulong) offset, (ulong) length)); - uint16 len= (data->read_header < end ? data->read_header : end) - offset; - memmove(buffer, data->header.header + offset, len); + memcpy(buffer, data->header.header + offset, len); length-= len; if (length == 0) DBUG_RETURN(requested_length); offset+= len; buffer+= len; DBUG_PRINT("info", - ("len: %u, offset %lu, curr %lu, length %lu", + ("len: %u offset: %lu curr: %lu length: %lu", len, (ulong) offset, (ulong) data->current_offset, (ulong) length)); } /* TODO: find first page which we should read by offset */ /* read the record chunk by chunk */ - do + for(;;) { uint page_end= data->current_offset + data->chunk_size; DBUG_PRINT("info", - ("enter body offset %lu, curr %lu, length %lu page_end %lu", + ("enter body offset: %lu curr: %lu " + "length: %lu page_end: %lu", (ulong) offset, (ulong) data->current_offset, (ulong) length, (ulong) page_end)); if (offset < page_end) { - DBUG_ASSERT(offset >= data->current_offset); uint len= page_end - offset; - memmove(buffer, + DBUG_ASSERT(offset >= data->current_offset); + memcpy(buffer, data->scanner.page + data->body_offset + (offset - data->current_offset), len); length-= len; @@ -5186,15 +5079,13 @@ translog_size_t translog_read_record(LSN lsn, offset+= len; buffer+= len; DBUG_PRINT("info", - ("len: %u, offset %lu, curr %lu, length %lu", + ("len: %u offset: %lu curr: %lu length: %lu", len, (ulong) offset, (ulong) data->current_offset, (ulong) length)); } if (translog_record_read_next_chunk(data)) DBUG_RETURN(requested_length - length); - } while (length != 0); - - DBUG_RETURN(requested_length); + } } @@ -5208,61 +5099,63 @@ translog_size_t translog_read_record(LSN lsn, static void translog_force_current_buffer_to_finish() { TRANSLOG_ADDRESS new_buff_begunning; - uint8 old_buffer_no= log_descriptor.bc.buffer_no; - uint8 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; - struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no; + uint16 old_buffer_no= log_descriptor.bc.buffer_no; + uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= (log_descriptor.buffers + + new_buffer_no); struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer; - uchar *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_size; - uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size; - uint16 current_page_size; - - new_buff_begunning= log_descriptor.bc.buffer->offset; - new_buff_begunning+= log_descriptor.bc.buffer->size; /* increase offset */ - + byte *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_fill; + uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill; + uint16 current_page_fill, write_counter, previous_offset; DBUG_ENTER("translog_force_current_buffer_to_finish"); - DBUG_PRINT("enter", ("Buffer #%u 0x%lx, " - "Buffer addr (%lu,0x%lx), " - "Page addr: (%lu,0x%lx), " - "New Buff: (%lu,0x%lx), " - "size %lu (%lu), Pg: %u, left: %u", + DBUG_PRINT("enter", ("Buffer #%u 0x%lx " + "Buffer addr: (%lu,0x%lx) " + "Page addr: (%lu,0x%lx) " + "New Buff: (%lu,0x%lx) " + "size: %lu (%lu) Pg: %u left: %u", (uint) log_descriptor.bc.buffer_no, (ulong) log_descriptor.bc.buffer, (ulong) LSN_FILE_NO(log_descriptor.bc.buffer->offset), (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) (LSN_OFFSET(log_descriptor.horizon) - - log_descriptor.bc.current_page_size), + log_descriptor.bc.current_page_fill), (ulong) LSN_FILE_NO(new_buff_begunning), (ulong) LSN_OFFSET(new_buff_begunning), (ulong) log_descriptor.bc.buffer->size, (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. buffer->buffer), - (uint) log_descriptor.bc.current_page_size, + (uint) log_descriptor.bc.current_page_fill, (uint) left)); + + new_buff_begunning= log_descriptor.bc.buffer->offset; + new_buff_begunning+= log_descriptor.bc.buffer->size; /* increase offset */ + DBUG_ASSERT(log_descriptor.bc.ptr !=NULL); DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) - %TRANSLOG_PAGE_SIZE == - log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE); + % TRANSLOG_PAGE_SIZE == + log_descriptor.bc.current_page_fill % TRANSLOG_PAGE_SIZE); DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) == LSN_FILE_NO(log_descriptor.bc.buffer->offset)); DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) + (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == LSN_OFFSET(log_descriptor.horizon)); - if (left != TRANSLOG_PAGE_SIZE && left != 0) + DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); + if (left != 0) { /* TODO: if 'left' is so small that can't hold any other record then do not move the page */ - DBUG_PRINT("info", ("left %u", (uint) left)); + DBUG_PRINT("info", ("left: %u", (uint) left)); /* decrease offset */ - new_buff_begunning-= log_descriptor.bc.current_page_size; - current_page_size= log_descriptor.bc.current_page_size; + new_buff_begunning-= log_descriptor.bc.current_page_fill; + current_page_fill= log_descriptor.bc.current_page_fill; bzero(log_descriptor.bc.ptr, left); log_descriptor.bc.buffer->size+= left; - DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx, " + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx " "Size: %lu", (uint) log_descriptor.bc.buffer->buffer_no, (ulong) log_descriptor.bc.buffer, @@ -5273,28 +5166,26 @@ static void translog_force_current_buffer_to_finish() else { left= 0; - log_descriptor.bc.current_page_size= 0; + log_descriptor.bc.current_page_fill= 0; } translog_buffer_lock(new_buffer); translog_wait_for_buffer_free(new_buffer); - { - uint16 write_counter= log_descriptor.bc.write_counter; - uint16 previous_offset= log_descriptor.bc.previous_offset; - translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); - log_descriptor.bc.buffer->offset= new_buff_begunning; - log_descriptor.bc.write_counter= write_counter; - log_descriptor.bc.previous_offset= previous_offset; - } + write_counter= log_descriptor.bc.write_counter; + previous_offset= log_descriptor.bc.previous_offset; + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + log_descriptor.bc.buffer->offset= new_buff_begunning; + log_descriptor.bc.write_counter= write_counter; + log_descriptor.bc.previous_offset= previous_offset; - if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) { translog_put_sector_protection(data, &log_descriptor.bc); if (left) { log_descriptor.bc.write_counter++; - log_descriptor.bc.previous_offset= current_page_size; + log_descriptor.bc.previous_offset= current_page_fill; } else { @@ -5304,21 +5195,21 @@ static void translog_force_current_buffer_to_finish() } } - if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) { - uint32 crc= translog_adler_crc(data + log_descriptor.page_overhead, - TRANSLOG_PAGE_SIZE - - log_descriptor.page_overhead); + uint32 crc= translog_crc(data + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc)); int4store(data + 3 + 3 + 1, crc); } if (left) { - memmove(new_buffer->buffer, data, current_page_size); - log_descriptor.bc.ptr +=current_page_size; - log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_size= - current_page_size; + memcpy(new_buffer->buffer, data, current_page_fill); + log_descriptor.bc.ptr +=current_page_fill; + log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill= + current_page_fill; new_buffer->overlay= old_buffer; } else @@ -5327,6 +5218,7 @@ static void translog_force_current_buffer_to_finish() DBUG_VOID_RETURN; } + /* Flush the log up to given LSN (included) @@ -5336,8 +5228,8 @@ static void translog_force_current_buffer_to_finish() the log have to be flushed RETURN - 0 - OK - 1 - Error + 0 OK + 1 Error */ my_bool translog_flush(LSN lsn) @@ -5346,9 +5238,8 @@ my_bool translog_flush(LSN lsn) int rc= 0; uint i; my_bool full_circle= 0; - DBUG_ENTER("translog_flush"); - DBUG_PRINT("enter", ("Flush up to LSN (%lu,0x%lx)", + DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn))); @@ -5356,16 +5247,15 @@ my_bool translog_flush(LSN lsn) old_flushed= log_descriptor.flushed; for (;;) { - uint8 buffer_no= log_descriptor.bc.buffer_no; - uint8 buffer_start= buffer_no; + uint16 buffer_no= log_descriptor.bc.buffer_no; + uint16 buffer_start= buffer_no; struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer; - struct st_translog_buffer *buffer= log_descriptor.bc.buffer; /* we can't flush in future */ DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, lsn) >= 0); if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0) { - DBUG_PRINT("info", ("already flushed (%lu,0x%lx)", + DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.flushed), (ulong) LSN_OFFSET(log_descriptor.flushed))); translog_unlock(); @@ -5383,7 +5273,7 @@ my_bool translog_flush(LSN lsn) translog_buffer_lock(buffer); translog_buffer_unlock(buffer_unlock); buffer_unlock= buffer; - if (buffer->file) + if (buffer->file != -1) { buffer_unlock= NULL; if (buffer_start == buffer_no) @@ -5398,12 +5288,10 @@ my_bool translog_flush(LSN lsn) cmp_translog_addr(log_descriptor.flushed, lsn) < 0); if (buffer_unlock != NULL) translog_buffer_unlock(buffer_unlock); - if (translog_buffer_flush(buffer)) - { - translog_buffer_unlock(buffer); - DBUG_RETURN(1); - } + rc= translog_buffer_flush(buffer); translog_buffer_unlock(buffer); + if (rc) + DBUG_RETURN(1); if (!full_circle) translog_lock(); } @@ -5417,10 +5305,10 @@ my_bool translog_flush(LSN lsn) OPENED_FILES_NUM) { /* file in the cache */ - if (log_descriptor.log_file_num[cache_index] == 0) + if (log_descriptor.log_file_num[cache_index] == -1) { if ((log_descriptor.log_file_num[cache_index]= - open_logfile_by_number_no_cache(i)) == 0) + open_logfile_by_number_no_cache(i)) == -1) { translog_unlock(); DBUG_RETURN(1); @@ -5429,13 +5317,7 @@ my_bool translog_flush(LSN lsn) file= log_descriptor.log_file_num[cache_index]; rc|= my_sync(file, MYF(MY_WME)); } - else - { - /* very unlike situation with extremely small file size */ - File file= open_logfile_by_number_no_cache(i); - rc|= my_sync(file, MYF(MY_WME)); - my_close(file, MYF(MY_WME)); - } + /* We sync file when we are closing it => do nothing if file closed */ } log_descriptor.flushed= sent_to_file; rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME)); diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 02c272361a4..3646afcf52a 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -1,13 +1,16 @@ -#ifndef _ma_loghandler_h -#define _ma_loghandler_h - /* Transaction log flags */ #define TRANSLOG_PAGE_CRC 1 #define TRANSLOG_SECTOR_PROTECTION (1<<1) #define TRANSLOG_RECORD_CRC (1<<2) +#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \ + TRANSLOG_RECORD_CRC) + 1) -/* page size in transaction log */ +/* + Page size in transaction log + It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE + (DISK_DRIVE_SECTOR_SIZE * 2^N) +*/ #define TRANSLOG_PAGE_SIZE (8*1024) #include "ma_loghandler_lsn.h" @@ -15,6 +18,14 @@ /* short transaction ID type */ typedef uint16 SHORT_TRANSACTION_ID; +/* Length of CRC at end of pages */ +#define CRC_LENGTH 4 +/* + Length of disk drive sector size (we assume that writing it + to disk is atomic operation) +*/ +#define DISK_DRIVE_SECTOR_SIZE 512 + /* types of records in the transaction log */ enum translog_record_type { @@ -51,8 +62,9 @@ enum translog_record_type LOGREC_LONG_TRANSACTION_ID= 30, LOGREC_RESERVED_FUTURE_EXTENSION= 63 }; -#define LOGREC_NUMBER_OF_TYPES 64 +#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */ +/* Size of log file; One log file is restricted to 4G */ typedef uint32 translog_size_t; #define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024 @@ -68,8 +80,8 @@ typedef struct st_translog_header_buffer { /* LSN of the read record */ LSN lsn; - /* type of the read record */ - enum translog_record_type type; + /* array of groups descriptors, can be used only if groups_no > 0 */ + TRANSLOG_GROUP *groups; /* short transaction ID or 0 if it has no sense for the record */ SHORT_TRANSACTION_ID short_trid; /* @@ -77,56 +89,56 @@ typedef struct st_translog_header_buffer hidden part of record (type, short TrID, length) */ translog_size_t record_length; - /* - Real compressed LSN(s) size economy (*7 - ) - */ - uint16 compressed_LSN_economy; /* Buffer for write decoded header of the record (depend on the record type) */ - uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE]; - /* non read body data offset on the page */ + byte header[TRANSLOG_RECORD_HEADER_MAX_SIZE]; + /* number of groups listed in */ + uint groups_no; + /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */ + uint chunk0_pages; + /* type of the read record */ + enum translog_record_type type; + /* chunk 0 data address (valid only if groups_no > 0) */ + TRANSLOG_ADDRESS chunk0_data_addr; + /* + Real compressed LSN(s) size economy (*7 - ) + */ + uint16 compressed_LSN_economy; + /* short transaction ID or 0 if it has no sense for the record */ uint16 non_header_data_start_offset; /* non read body data length in this first chunk */ uint16 non_header_data_len; - /* number of groups listed in */ - uint groups_no; - /* array of groups descriptors, can be used only if groups_no > 0 */ - TRANSLOG_GROUP *groups; - /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */ - uint chunk0_pages; - /* chunk 0 data address (valid only if groups_no > 0) */ - TRANSLOG_ADDRESS chunk0_data_addr; /* chunk 0 data size (valid only if groups_no > 0) */ uint16 chunk0_data_len; } TRANSLOG_HEADER_BUFFER; -struct st_translog_scanner_data +typedef struct st_translog_scanner_data { - uchar buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */ - TRANSLOG_ADDRESS page_addr; /* current page address */ - TRANSLOG_ADDRESS horizon; /* end of the log which we saw - last time */ - TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ - uchar *page; /* page content pointer */ - translog_size_t page_offset; /* offset of the chunk in the - page */ - my_bool fixed_horizon; /* set horizon only once at - init */ -}; + byte buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */ + TRANSLOG_ADDRESS page_addr; /* current page address */ + /* end of the log which we saw last time */ + TRANSLOG_ADDRESS horizon; + TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ + byte *page; /* page content pointer */ + /* offset of the chunk in the page */ + translog_size_t page_offset; + /* set horizon only once at init */ + my_bool fixed_horizon; +} TRANSLOG_SCANNER_DATA; struct st_translog_reader_data { TRANSLOG_HEADER_BUFFER header; /* Header */ - struct st_translog_scanner_data scanner; /* chunks scanner */ + TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */ translog_size_t body_offset; /* current chunk body offset */ - translog_size_t current_offset; /* data offset from the record - beginning */ - uint16 read_header; /* number of bytes read in - header */ + /* data offset from the record beginning */ + translog_size_t current_offset; + /* number of bytes read in header */ + uint16 read_header; uint16 chunk_size; /* current chunk size */ uint current_group; /* current group */ uint current_chunk; /* current chunk in the group */ @@ -134,181 +146,37 @@ struct st_translog_reader_data }; -/* - Initialize transaction log - - SYNOPSIS - translog_init() - directory Directory where log files are put - log_file_max_size max size of one log size (for new logs creation) - server_version version of MySQL servger (MYSQL_VERSION_ID) - server_id server ID (replication & Co) - pagecache Page cache for the log reads - flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION - TRANSLOG_RECORD_CRC) - - RETURN - 0 - OK - 1 - Error -*/ - my_bool translog_init(const char *directory, uint32 log_file_max_size, - uint32 server_version, - uint32 server_id, PAGECACHE *pagecache, uint flags); - - -/* - Write the log record - - SYNOPSIS - translog_write_record() - lsn LSN of the record will be writen here - type the log record type - short_trid Sort transaction ID or 0 if it has no sense - tcb Transaction control block pointer for hooks by - record log type - partN_length length of Ns part of the log - partN_buffer pointer on Ns part buffer - 0 sign of the end of parts - - RETURN - 0 - OK - 1 - Error -*/ + uint32 server_version, uint32 server_id, + PAGECACHE *pagecache, uint flags); my_bool translog_write_record(LSN *lsn, enum translog_record_type type, SHORT_TRANSACTION_ID short_trid, void *tcb, translog_size_t part1_length, - uchar *part1_buff, ...); - - -/* - Free log handler resources - - SYNOPSIS - translog_destroy() -*/ + byte *part1_buff, ...); void translog_destroy(); - -/* - Read record header and some fixed part of a record (the part depend on - record type). - - SYNOPSIS - translog_read_record_header() - lsn log record serial number (address of the record) - buff log record header buffer - - NOTE - - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed - correctly. - - Some type of record can be read completely by this call - - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative - LSN can be translated to absolute one), some fields can be added - (like actual header length in the record if the header has variable - length) - - RETURN - 0 - error - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header -*/ - translog_size_t translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff); - -/* - Free resources used by TRANSLOG_HEADER_BUFFER - - SYNOPSIS - translog_free_record_header(); -*/ - void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff); - -/* - Read a part of the record. - - SYNOPSIS - translog_read_record_header() - lsn log record serial number (address of the record) - offset from the beginning of the record beginning (read - by translog_read_record_header). - length length of record part which have to be read. - buffer buffer where to read the record part (have to be at - least 'length' bytes length) - - RETURN - 0 - error (or read out of the record) - length of data actually read -*/ - translog_size_t translog_read_record(LSN lsn, translog_size_t offset, translog_size_t length, - uchar *buffer, + byte *buffer, struct st_translog_reader_data *data); - -/* - Flush the log up to given LSN (included) - - SYNOPSIS - translog_flush() - lsn log record serial number up to which (inclusive) - the log have to be flushed - - RETURN - 0 - OK - 1 - Error -*/ - my_bool translog_flush(LSN lsn); +my_bool translog_init_scanner(LSN lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner); -/* - Read record header and some fixed part of the next record (the part - depend on record type). +translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff); - SYNOPSIS - translog_read_next_record_header() - lsn log record serial number (address of the record) - previous to the record which will be read - If LSN present scanner will be initialized from it, - do not use LSN after initialization for fast scanning. - buff log record header buffer - fixed_horizon true if it is OK do not read records which was written - after scaning begining - scanner data for scaning if lsn is NULL scanner data - will be used for continue scaning. - scanner can be NULL. - - NOTE - - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed - correctly (lsn in buffer will be replaced by next record, but initial - lsn will be read correctly). - - it is like translog_read_record_header, but read next record, so see - its NOTES. - - in case of end of the log buff->lsn will be set to - (CONTROL_FILE_IMPOSSIBLE_LOGNO, 0) - RETURN - 0 - error - TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 - End of the log - number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded - part of the header -*/ - -translog_size_t translog_read_next_record_header(LSN lsn, - TRANSLOG_HEADER_BUFFER *buff, - my_bool fixed_horizon, - struct - st_translog_scanner_data - *scanner); - -#endif diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index 472298de07c..9625f109864 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -30,15 +30,18 @@ typedef TRANSLOG_ADDRESS LSN; /* checks LSN */ #define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL) +/* size of stored LSN on a disk */ +#define LSN_STORE_SIZE 7 + /* Puts LSN into buffer (dst) */ -#define lsn7store(dst, lsn) \ +#define lsn_store(dst, lsn) \ do { \ int3store((dst), LSN_FILE_NO(lsn)); \ int4store((dst) + 3, LSN_OFFSET(lsn)); \ } while (0) /* Unpacks LSN from the buffer (P) */ -#define lsn7korr(P) MAKE_LSN(uint3korr(P), uint4korr((P) + 3)) +#define lsn_korr(P) MAKE_LSN(uint3korr(P), uint4korr((P) + 3)) /* what we need to add to LSN to increase it on one file */ #define LSN_ONE_FILE ((int64)0x100000000LL) diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index afbef150b62..54ed4b0d746 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -47,10 +47,10 @@ static const char *default_dbug_option; 1 - Error */ -static my_bool check_content(uchar *ptr, ulong length) +static my_bool check_content(byte *ptr, ulong length) { ulong i; - uchar buff[2]; + byte buff[2]; for (i= 0; i < length; i++) { if (i % 2 == 0) @@ -81,7 +81,7 @@ static my_bool check_content(uchar *ptr, ulong length) */ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, - uchar *buffer, uint skip) + byte *buffer, uint skip) { DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2); if (translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL) != @@ -95,16 +95,16 @@ int main(int argc, char *argv[]) uint32 i; uint32 rec_len; uint pagen; - uchar long_tr_id[6]; - uchar lsn_buff[23]= + byte long_tr_id[6]; + byte lsn_buff[23]= { 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 }; - uchar long_buffer[LONG_BUFFER_SIZE * 2 + 7 * 2 + 2]; + byte long_buffer[LONG_BUFFER_SIZE * 2 + LSN_STORE_SIZE * 2 + 2]; PAGECACHE pagecache; - LSN lsn, lsn_base, first_lsn, lsn_ptr; + LSN lsn, lsn_base, first_lsn; TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; int rc; @@ -114,7 +114,7 @@ int main(int argc, char *argv[]) bzero(&pagecache, sizeof(pagecache)); maria_data_root= "."; - for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i+= 2) + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2) { int2store(long_buffer + i, (i >> 1)); /* long_buffer[i]= (i & 0xFF); */ @@ -173,7 +173,7 @@ int main(int argc, char *argv[]) printf("write %d\n", i); if (i % 2) { - lsn7store(lsn_buff, lsn_base); + lsn_store(lsn_buff, lsn_base); if (translog_write_record(&lsn, LOGREC_CLR_END, (i % 0xFFFF), NULL, 7, lsn_buff, 0)) @@ -183,7 +183,7 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, lsn_base); + lsn_store(lsn_buff, lsn_base); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) rec_len= 12; if (translog_write_record(&lsn, @@ -199,8 +199,8 @@ int main(int argc, char *argv[]) } else { - lsn7store(lsn_buff, lsn_base); - lsn7store(lsn_buff + 7, first_lsn); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, (i % 0xFFFF), NULL, 23, lsn_buff, 0)) @@ -210,8 +210,8 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, lsn_base); - lsn7store(lsn_buff + 7, first_lsn); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) rec_len= 19; if (translog_write_record(&lsn, @@ -291,7 +291,7 @@ int main(int argc, char *argv[]) } if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || rec.record_length != 6 || uint4korr(rec.header) != 0 || - (uint)rec.header[4] != 0 || rec.header[5] != 0xFF || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF || first_lsn != rec.lsn) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" @@ -304,12 +304,16 @@ int main(int argc, char *argv[]) goto err; } lsn= first_lsn; - lsn_ptr= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } for (i= 1;; i++) { if (i % 1000 == 0) printf("read %d\n", i); - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", @@ -326,21 +330,21 @@ int main(int argc, char *argv[]) } break; } - /* use scanner after its initialization */ - lsn_ptr= 0; if (i % 2) { LSN ref; - ref= lsn7korr(rec.header); + ref= lsn_korr(rec.header); if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || rec.record_length != 7 || ref != lsn) { fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d) " - "type %u, strid %u, len %u, ref(%lu,0x%lx), " + "type: %u strid: %u len: %u" + "ref: (%lu,0x%lx) (%lu,0x%lx) " "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn), (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } @@ -348,18 +352,22 @@ int main(int argc, char *argv[]) else { LSN ref1, ref2; - ref1= lsn7korr(rec.header); - ref2= lsn7korr(rec.header + 7); - if (rec.type !=LOGREC_UNDO_ROW_DELETE || + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if (rec.type != LOGREC_UNDO_ROW_DELETE || rec.short_trid != (i % 0xFFFF) || rec.record_length != 23 || ref1 != lsn || ref2 != first_lsn || - rec.header[22] != 0x55 || rec.header[21] != 0xAA || - rec.header[20] != 0x55 || rec.header[19] != 0xAA || - rec.header[18] != 0x55 || rec.header[17] != 0xAA || - rec.header[16] != 0x55 || rec.header[15] != 0xAA || - rec.header[14] != 0x55) + ((uchar)rec.header[22]) != 0x55 || + ((uchar)rec.header[21]) != 0xAA || + ((uchar)rec.header[20]) != 0x55 || + ((uchar)rec.header[19]) != 0xAA || + ((uchar)rec.header[18]) != 0x55 || + ((uchar)rec.header[17]) != 0xAA || + ((uchar)rec.header[16]) != 0x55 || + ((uchar)rec.header[15]) != 0xAA || + ((uchar)rec.header[14]) != 0x55) { fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" "type %u, strid %u, len %u, ref1(%lu,0x%lx), " @@ -378,7 +386,7 @@ int main(int argc, char *argv[]) goto err; } } - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { fprintf(stderr, "1-%d translog_read_next_record_header (var) " @@ -394,14 +402,14 @@ int main(int argc, char *argv[]) if (i % 2) { LSN ref; - ref= lsn7korr(rec.header); + ref= lsn_korr(rec.header); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) rec_len= 12; if (rec.type !=LOGREC_UNDO_KEY_INSERT || rec.short_trid != (i % 0xFFFF) || - rec.record_length != rec_len + 7 || + rec.record_length != rec_len + LSN_STORE_SIZE || len != 12 || ref != lsn || - check_content(rec.header + 7, len - 7)) + check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " @@ -412,16 +420,17 @@ int main(int argc, char *argv[]) (uint) rec.short_trid, rec.short_trid != (i % 0xFFFF), (ulong) rec.record_length, (ulong) rec_len, - rec.record_length != rec_len + 7, + rec.record_length != rec_len + LSN_STORE_SIZE, (uint) len, len != 12, (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), (len != 12 || ref != lsn), - check_content(rec.header + 7, len - 7)); + check_content(rec.header + LSN_STORE_SIZE, + len - LSN_STORE_SIZE)); goto err; } - if (read_and_check_content(&rec, long_buffer, 7)) + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " @@ -433,20 +442,21 @@ int main(int argc, char *argv[]) else { LSN ref1, ref2; - ref1= lsn7korr(rec.header); - ref2= lsn7korr(rec.header + 7); + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) rec_len= 19; if (rec.type !=LOGREC_UNDO_KEY_DELETE || rec.short_trid != (i % 0xFFFF) || - rec.record_length != rec_len + 14 || + rec.record_length != rec_len + LSN_STORE_SIZE * 2 || len != 19 || ref1 != lsn || ref2 != first_lsn || - check_content(rec.header + 14, len - 14)) + check_content(rec.header + LSN_STORE_SIZE * 2, + len - LSN_STORE_SIZE * 2)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" - "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, " + "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, " "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, @@ -457,7 +467,7 @@ int main(int argc, char *argv[]) (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } - if (read_and_check_content(&rec, long_buffer, 14)) + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " @@ -467,7 +477,7 @@ int main(int argc, char *argv[]) } } - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", @@ -483,7 +493,7 @@ int main(int argc, char *argv[]) if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != (i % 0xFFFF) || rec.record_length != 6 || uint4korr(rec.header) != i || - rec.header[4] != 0 || rec.header[5] != 0xFF) + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " @@ -498,7 +508,7 @@ int main(int argc, char *argv[]) lsn= rec.lsn; - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) rec_len= 9; if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 1d5ad9c81d8..3a2525a089e 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -42,10 +42,10 @@ static const char *default_dbug_option; 1 - Error */ -static my_bool check_content(uchar *ptr, ulong length) +static my_bool check_content(byte *ptr, ulong length) { ulong i; - uchar buff[4]; + byte buff[4]; DBUG_ENTER("check_content"); for (i= 0; i < length; i++) { @@ -79,12 +79,12 @@ static my_bool check_content(uchar *ptr, ulong length) */ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, - uchar *buffer, uint skip) + byte *buffer, uint skip) { int res= 0; translog_size_t len; DBUG_ENTER("read_and_check_content"); - DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2); + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); if ((len= translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL)) != rec->record_length) { @@ -113,16 +113,16 @@ int main(int argc, char *argv[]) uint32 i; uint32 rec_len; uint pagen; - uchar long_tr_id[6]; - uchar lsn_buff[23]= + byte long_tr_id[6]; + byte lsn_buff[23]= { 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 }; - uchar *long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); + byte *long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); PAGECACHE pagecache; - LSN lsn, lsn_base, first_lsn, lsn_ptr; + LSN lsn, lsn_base, first_lsn; TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; int rc; @@ -133,8 +133,8 @@ int main(int argc, char *argv[]) maria_data_root= "."; { - uchar buff[4]; - for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) + byte buff[4]; + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++) { if (i % 4 == 0) int4store(buff, (i >> 2)); @@ -195,22 +195,24 @@ int main(int argc, char *argv[]) printf("write %d\n", i); if (i % 2) { - lsn7store(lsn_buff, lsn_base); + lsn_store(lsn_buff, lsn_base); if (translog_write_record(&lsn, LOGREC_CLR_END, - (i % 0xFFFF), NULL, 7, lsn_buff, 0)) + (i % 0xFFFF), NULL, + LSN_STORE_SIZE, lsn_buff, 0)) { fprintf(stderr, "1 Can't write reference before record #%lu\n", (ulong) i); translog_destroy(); exit(1); } - lsn7store(lsn_buff, lsn_base); + lsn_store(lsn_buff, lsn_base); rec_len= get_len(); if (translog_write_record(&lsn, LOGREC_UNDO_KEY_INSERT, (i % 0xFFFF), - NULL, 7, lsn_buff, rec_len, long_buffer, 0)) + NULL, LSN_STORE_SIZE, lsn_buff, + rec_len, long_buffer, 0)) { fprintf(stderr, "1 Can't write var reference before record #%lu\n", (ulong) i); @@ -220,8 +222,8 @@ int main(int argc, char *argv[]) } else { - lsn7store(lsn_buff, lsn_base); - lsn7store(lsn_buff + 7, first_lsn); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, (i % 0xFFFF), NULL, 23, lsn_buff, 0)) @@ -231,13 +233,14 @@ int main(int argc, char *argv[]) translog_destroy(); exit(1); } - lsn7store(lsn_buff, lsn_base); - lsn7store(lsn_buff + 7, first_lsn); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); rec_len= get_len(); if (translog_write_record(&lsn, LOGREC_UNDO_KEY_DELETE, (i % 0xFFFF), - NULL, 14, lsn_buff, rec_len, long_buffer, 0)) + NULL, LSN_STORE_SIZE * 2, lsn_buff, + rec_len, long_buffer, 0)) { fprintf(stderr, "0 Can't write var reference before record #%lu\n", (ulong) i); @@ -304,7 +307,7 @@ int main(int argc, char *argv[]) } if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || rec.record_length != 6 || uint4korr(rec.header) != 0 || - (uint)rec.header[4] != 0 || rec.header[5] != 0xFF || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF || first_lsn != rec.lsn) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" @@ -319,12 +322,16 @@ int main(int argc, char *argv[]) } translog_free_record_header(&rec); lsn= first_lsn; - lsn_ptr= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } for (i= 1;; i++) { if (i % SHOW_DIVIDER == 0) printf("read %d\n", i); - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", @@ -343,15 +350,13 @@ int main(int argc, char *argv[]) } break; } - /* use scanner after its initialization */ - lsn_ptr= 0; if (i % 2) { LSN ref; - ref= lsn7korr(rec.header); + ref= lsn_korr(rec.header); if (rec.type != LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || - rec.record_length != 7 || ref != lsn) + rec.record_length != LSN_STORE_SIZE || ref != lsn) { fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)" "type %u, strid %u, len %u, ref(%lu,0x%lx), lsn(%lu,0x%lx)\n", @@ -366,18 +371,22 @@ int main(int argc, char *argv[]) else { LSN ref1, ref2; - ref1= lsn7korr(rec.header); - ref2= lsn7korr(rec.header + 7); + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); if (rec.type !=LOGREC_UNDO_ROW_DELETE || rec.short_trid != (i % 0xFFFF) || rec.record_length != 23 || ref1 != lsn || ref2 != first_lsn || - rec.header[22] != 0x55 || rec.header[21] != 0xAA || - rec.header[20] != 0x55 || rec.header[19] != 0xAA || - rec.header[18] != 0x55 || rec.header[17] != 0xAA || - rec.header[16] != 0x55 || rec.header[15] != 0xAA || - rec.header[14] != 0x55) + ((uchar)rec.header[22]) != 0x55 || + ((uchar)rec.header[21]) != 0xAA || + ((uchar)rec.header[20]) != 0x55 || + ((uchar)rec.header[19]) != 0xAA || + ((uchar)rec.header[18]) != 0x55 || + ((uchar)rec.header[17]) != 0xAA || + ((uchar)rec.header[16]) != 0x55 || + ((uchar)rec.header[15]) != 0xAA || + ((uchar)rec.header[14]) != 0x55) { fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" "type %u, strid %u, len %u, ref1(%lu,0x%lx), " @@ -399,7 +408,7 @@ int main(int argc, char *argv[]) } translog_free_record_header(&rec); - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { fprintf(stderr, "1-%d translog_read_next_record_header (var) " @@ -415,13 +424,13 @@ int main(int argc, char *argv[]) if (i % 2) { LSN ref; - ref= lsn7korr(rec.header); + ref= lsn_korr(rec.header); rec_len= get_len(); if (rec.type !=LOGREC_UNDO_KEY_INSERT || rec.short_trid != (i % 0xFFFF) || - rec.record_length != rec_len + 7 || + rec.record_length != rec_len + LSN_STORE_SIZE || len != 12 || ref != lsn || - check_content(rec.header + 7, len - 7)) + check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " @@ -432,17 +441,18 @@ int main(int argc, char *argv[]) (uint) rec.short_trid, rec.short_trid != (i % 0xFFFF), (ulong) rec.record_length, (ulong) rec_len, - rec.record_length != rec_len + 7, + rec.record_length != rec_len + LSN_STORE_SIZE, (uint) len, len != 12, (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), (ref != lsn), - check_content(rec.header + 7, len - 7)); + check_content(rec.header + LSN_STORE_SIZE, + len - LSN_STORE_SIZE)); translog_free_record_header(&rec); goto err; } - if (read_and_check_content(&rec, long_buffer, 7)) + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " @@ -455,19 +465,20 @@ int main(int argc, char *argv[]) else { LSN ref1, ref2; - ref1= lsn7korr(rec.header); - ref2= lsn7korr(rec.header + 7); + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); rec_len= get_len(); if (rec.type !=LOGREC_UNDO_KEY_DELETE || rec.short_trid != (i % 0xFFFF) || - rec.record_length != rec_len + 14 || + rec.record_length != rec_len + LSN_STORE_SIZE * 2 || len != 19 || ref1 != lsn || ref2 != first_lsn || - check_content(rec.header + 14, len - 14)) + check_content(rec.header + LSN_STORE_SIZE * 2, + len - LSN_STORE_SIZE * 2)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" - "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, " + "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, " "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " "lsn(%lu,0x%lx)\n", i, (uint) rec.type, (uint) rec.short_trid, @@ -479,7 +490,7 @@ int main(int argc, char *argv[]) translog_free_record_header(&rec); goto err; } - if (read_and_check_content(&rec, long_buffer, 14)) + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2)) { fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " @@ -491,7 +502,7 @@ int main(int argc, char *argv[]) } translog_free_record_header(&rec); - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", @@ -509,7 +520,7 @@ int main(int argc, char *argv[]) if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != (i % 0xFFFF) || rec.record_length != 6 || uint4korr(rec.header) != i || - rec.header[4] != 0 || rec.header[5] != 0xFF) + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF) { fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " @@ -526,7 +537,7 @@ int main(int argc, char *argv[]) lsn= rec.lsn; - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); + len= translog_read_next_record_header(&scanner, &rec); rec_len= get_len(); if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || rec.short_trid != (i % 0xFFFF) || diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index ed5479026ef..eb64d15868c 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -27,7 +27,7 @@ static uint thread_count; static ulong lens[WRITERS][ITERATIONS]; static LSN lsns1[WRITERS][ITERATIONS]; static LSN lsns2[WRITERS][ITERATIONS]; -static uchar *long_buffer; +static byte *long_buffer; /* Get pseudo-random length of the field in @@ -65,12 +65,12 @@ static uint32 get_len() 1 - Error */ -static my_bool check_content(uchar *ptr, ulong length) +static my_bool check_content(byte *ptr, ulong length) { ulong i; for (i= 0; i < length; i++) { - if (ptr[i] != (i & 0xFF)) + if (((uchar)ptr[i]) != (i & 0xFF)) { fprintf(stderr, "Byte # %lu is %x instead of %x", i, (uint) ptr[i], (uint) (i & 0xFF)); @@ -97,7 +97,7 @@ static my_bool check_content(uchar *ptr, ulong length) static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, - uchar *buffer, uint skip) + byte *buffer, uint skip) { int res= 0; translog_size_t len; @@ -117,7 +117,7 @@ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, void writer(int num) { LSN lsn; - uchar long_tr_id[6]; + byte long_tr_id[6]; uint i; DBUG_ENTER("writer"); @@ -193,7 +193,7 @@ int main(int argc, char **argv __attribute__ ((unused))) uint32 i; uint pagen; PAGECACHE pagecache; - LSN first_lsn, lsn_ptr; + LSN first_lsn; TRANSLOG_HEADER_BUFFER rec; struct st_translog_scanner_data scanner; pthread_t tid; @@ -290,7 +290,7 @@ int main(int argc, char **argv __attribute__ ((unused))) srandom(122334817L); { - uchar long_tr_id[6]= + byte long_tr_id[6]= { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 }; @@ -369,11 +369,14 @@ int main(int argc, char **argv __attribute__ ((unused))) bzero(indeces, sizeof(indeces)); - lsn_ptr= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } for (i= 0;; i++) { - len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner); - lsn_ptr= 0; + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index f215805d829..fc9b34ff980 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) bzero(page, PCACHE_PAGE); #define PAGE_LSN_OFFSET 0 - lsn7store(page + PAGE_LSN_OFFSET, lsn); + lsn_store(page + PAGE_LSN_OFFSET, lsn); pagecache_write(&pagecache, &file1, 0, 3, (char*)page, PAGECACHE_LSN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, diff --git a/support-files/magic b/support-files/magic index c8ca5875211..e2bd4d70fb5 100644 --- a/support-files/magic +++ b/support-files/magic @@ -17,3 +17,10 @@ 0 belong&0xffffff00 0xfefe0600 MySQL ISAM compressed data file >3 byte x Version %d 0 string \376bin MySQL replication log +0 belong&0xffffff00 0xfefe0b00 +>4 string MARIALOG MySQL Maria transaction log file +>>3 byte x Version %d +0 belong&0xffffff00 0xfefe0c00 +>4 string MACF MySQL Maria control file +>>3 byte x Version %d + From 3b0e794aca0239a7b5a63c590b52efe2b4e0141d Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 21 Feb 2007 15:54:08 +0200 Subject: [PATCH 90/95] unittest fixed storage/maria/unittest/Makefile.am: Unittest cleunup added storage/maria/unittest/ma_control_file-t.c: Unittest fixed according new file format storage/maria/unittest/ma_test_loghandler-t.c: Unittest cleunup added Unittest progress report added storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Unittest cleunup added Unittest progress report added storage/maria/unittest/ma_test_loghandler_multithread-t.c: Unittest cleunup added Unittest progress report added storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Unittest cleunup added Unittest progress report added storage/maria/unittest/ma_maria_log_cleanup.c: New BitKeeper file ``storage/maria/unittest/ma_maria_log_cleanup.c'' --- storage/maria/unittest/Makefile.am | 5 ++ storage/maria/unittest/ma_control_file-t.c | 15 +++-- storage/maria/unittest/ma_maria_log_cleanup.c | 45 +++++++++++++ storage/maria/unittest/ma_test_loghandler-t.c | 46 +++++++++++-- .../ma_test_loghandler_multigroup-t.c | 38 +++++++++-- .../ma_test_loghandler_multithread-t.c | 64 ++++++++----------- .../unittest/ma_test_loghandler_pagecache-t.c | 8 +++ 7 files changed, 163 insertions(+), 58 deletions(-) create mode 100644 storage/maria/unittest/ma_maria_log_cleanup.c diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index 74d9059ca7d..33f1bfed560 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -44,6 +44,11 @@ noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t \ ma_test_loghandler_multithread-t \ ma_test_loghandler_pagecache-t +ma_test_loghandler_t_SOURCES= ma_test_loghandler-t.c ma_maria_log_cleanup.c +ma_test_loghandler_multigroup_t_SOURCES= ma_test_loghandler_multigroup-t.c ma_maria_log_cleanup.c +ma_test_loghandler_multithread_t_SOURCES= ma_test_loghandler_multithread-t.c ma_maria_log_cleanup.c +ma_test_loghandler_pagecache_t_SOURCES= ma_test_loghandler_pagecache-t.c ma_maria_log_cleanup.c + mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index 9f6a6c9cf56..1b37ee3f53c 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -83,6 +83,7 @@ static void get_options(int argc, char *argv[]); int main(int argc,char *argv[]) { MY_INIT(argv[0]); + maria_data_root= "."; plan(9); @@ -263,18 +264,18 @@ static int test_binary_content() future change/breakage. */ - char buffer[17]; + char buffer[20]; RET_ERR_UNLESS((fd= my_open(file_name, O_BINARY | O_RDWR, MYF(MY_WME))) >= 0); - RET_ERR_UNLESS(my_read(fd, buffer, 17, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_read(fd, buffer, 20, MYF(MY_FNABP | MY_WME)) == 0); RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); - i= uint4korr(buffer+5); + i= uint3korr(buffer+9); RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn)); - i= uint4korr(buffer+9); + i= uint4korr(buffer+12); RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn)); - i= uint4korr(buffer+13); + i= uint4korr(buffer+16); RET_ERR_UNLESS(i == last_logno); RET_ERR_UNLESS(close_file() == 0); return 0; @@ -341,9 +342,9 @@ static int test_bad_checksum() RET_ERR_UNLESS((fd= my_open(file_name, O_BINARY | O_RDWR, MYF(MY_WME))) >= 0); - RET_ERR_UNLESS(my_pread(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); buffer[0]+= 3; /* mangle checksum */ - RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_BAD_CHECKSUM); diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c new file mode 100644 index 00000000000..c5917764b9b --- /dev/null +++ b/storage/maria/unittest/ma_maria_log_cleanup.c @@ -0,0 +1,45 @@ +#include "../maria_def.h" +#include + +my_bool maria_log_remove() +{ + MY_DIR *dirp; + uint i; + MY_STAT stat_buff; + char file_name[FN_REFLEN]; + + /* Removes control file */ + if (fn_format(file_name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) == NullS) + return 1; + if (my_stat(file_name, &stat_buff, MYF(0)) && + my_delete(file_name, MYF(MY_WME)) != 0) + return 1; + + /* Finds and removes transaction log files */ + if (!(dirp = my_dir(maria_data_root, MYF(MY_DONT_SORT)))) + return 1; + + for (i= 0; i < dirp->number_off_files; i++) + { + char *file= dirp->dir_entry[i].name; + if (strncmp(file, "maria_log.", 10) == 0 && + file[10] >= '0' && file[10] <= '9' && + file[11] >= '0' && file[11] <= '9' && + file[12] >= '0' && file[12] <= '9' && + file[13] >= '0' && file[13] <= '9' && + file[14] >= '0' && file[14] <= '9' && + file[15] >= '0' && file[15] <= '9' && + file[16] >= '0' && file[16] <= '9' && + file[17] >= '0' && file[17] <= '9' && + file[18] == '\0') + { + if (fn_format(file_name, file, + maria_data_root, "", MYF(MY_WME)) == NullS || + my_delete(file_name, MYF(MY_WME)) != 0) + return 1; + } + } + return 0; +} + diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index 54ed4b0d746..757520322c8 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -3,6 +3,8 @@ #include #include +extern my_bool maria_log_remove(); + #ifndef DBUG_OFF static const char *default_dbug_option; #endif @@ -113,6 +115,8 @@ int main(int argc, char *argv[]) bzero(&pagecache, sizeof(pagecache)); maria_data_root= "."; + if (maria_log_remove()) + exit(1); for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2) { @@ -152,6 +156,8 @@ int main(int argc, char *argv[]) exit(1); } + plan(((ITERATIONS - 1) * 4 + 1)*2 + ITERATIONS - 1); + srandom(122334817L); long_tr_id[5]= 0xff; @@ -163,14 +169,14 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); exit(1); } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); lsn_base= first_lsn= lsn; for (i= 1; i < ITERATIONS; i++) { - if (i % 1000 == 0) - printf("write %d\n", i); if (i % 2) { lsn_store(lsn_buff, lsn_base); @@ -181,8 +187,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "1 Can't write reference defore record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_CLR_END"); exit(1); } + ok(1, "write LOGREC_CLR_END"); lsn_store(lsn_buff, lsn_base); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) rec_len= 12; @@ -194,8 +202,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "1 Can't write var reference defore record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_INSERT"); exit(1); } + ok(1, "write LOGREC_UNDO_KEY_INSERT"); } else { @@ -208,8 +218,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "0 Can't write reference defore record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_UNDO_ROW_DELETE"); exit(1); } + ok(1, "write LOGREC_UNDO_ROW_DELETE"); lsn_store(lsn_buff, lsn_base); lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) @@ -222,8 +234,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "0 Can't write var reference defore record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_DELETE"); exit(1); } + ok(1, "write LOGREC_UNDO_KEY_DELETE"); } int4store(long_tr_id, i); if (translog_write_record(&lsn, @@ -232,8 +246,10 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Can't write record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); exit(1); } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); lsn_base= lsn; @@ -245,14 +261,18 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_REDO_INSERT_ROW_HEAD"); exit(1); } + ok(1, "write LOGREC_REDO_INSERT_ROW_HEAD"); if (translog_flush(lsn)) { fprintf(stderr, "Can't flush #%lu\n", (ulong) i); translog_destroy(); + ok(0, "flush"); exit(1); } + ok(1, "flush"); } translog_destroy(); @@ -303,6 +323,8 @@ int main(int argc, char *argv[]) (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } + ok(1, "read record"); + translog_free_record_header(&rec); lsn= first_lsn; if (translog_init_scanner(first_lsn, 1, &scanner)) { @@ -311,8 +333,6 @@ int main(int argc, char *argv[]) } for (i= 1;; i++) { - if (i % 1000 == 0) - printf("read %d\n", i); len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { @@ -386,6 +406,9 @@ int main(int argc, char *argv[]) goto err; } } + ok(1, "read record"); + translog_free_record_header(&rec); + len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { @@ -476,6 +499,8 @@ int main(int argc, char *argv[]) goto err; } } + ok(1, "read record"); + translog_free_record_header(&rec); len= translog_read_next_record_header(&scanner, &rec); if (len == 0) @@ -505,8 +530,9 @@ int main(int argc, char *argv[]) (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } - lsn= rec.lsn; + ok(1, "read record"); + translog_free_record_header(&rec); len= translog_read_next_record_header(&scanner, &rec); if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) @@ -533,14 +559,20 @@ int main(int argc, char *argv[]) (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); goto err; } + ok(1, "read record"); + translog_free_record_header(&rec); } } - rc= 1; + rc= 0; err: + if (rc) + ok(0, "read record"); translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); - return(test(exit_status() || rc)); + if (maria_log_remove()) + exit(1); + return(test(exit_status())); } diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 3a2525a089e..4aaf30bd9a3 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -3,6 +3,8 @@ #include #include +extern my_bool maria_log_remove(); + #ifndef DBUG_OFF static const char *default_dbug_option; #endif @@ -13,8 +15,6 @@ static const char *default_dbug_option; #define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1) -#define SHOW_DIVIDER 2 - #define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) #define ITERATIONS 2 /*#define ITERATIONS 63 */ @@ -131,6 +131,8 @@ int main(int argc, char *argv[]) bzero(&pagecache, sizeof(pagecache)); maria_data_root= "."; + if (maria_log_remove()) + exit(1); { byte buff[4]; @@ -174,6 +176,8 @@ int main(int argc, char *argv[]) exit(1); } + plan(((ITERATIONS - 1) * 4 + 1) * 2); + srandom(122334817L); long_tr_id[5]= 0xff; @@ -185,14 +189,14 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); exit(1); } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); lsn_base= first_lsn= lsn; for (i= 1; i < ITERATIONS; i++) { - if (i % SHOW_DIVIDER == 0) - printf("write %d\n", i); if (i % 2) { lsn_store(lsn_buff, lsn_base); @@ -204,8 +208,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "1 Can't write reference before record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_CLR_END"); exit(1); } + ok(1, "write LOGREC_CLR_END"); lsn_store(lsn_buff, lsn_base); rec_len= get_len(); if (translog_write_record(&lsn, @@ -217,8 +223,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "1 Can't write var reference before record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_INSERT"); exit(1); } + ok(1, "write LOGREC_UNDO_KEY_INSERT"); } else { @@ -231,8 +239,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "0 Can't write reference before record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_UNDO_ROW_DELETE"); exit(1); } + ok(1, "write LOGREC_UNDO_ROW_DELETE"); lsn_store(lsn_buff, lsn_base); lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); rec_len= get_len(); @@ -245,8 +255,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "0 Can't write var reference before record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_DELETE"); exit(1); } + ok(1, "write LOGREC_UNDO_KEY_DELETE"); } int4store(long_tr_id, i); if (translog_write_record(&lsn, @@ -255,8 +267,10 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Can't write record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); exit(1); } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); lsn_base= lsn; @@ -267,8 +281,10 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); translog_destroy(); + ok(0, "write LOGREC_REDO_INSERT_ROW_HEAD"); exit(1); } + ok(1, "write LOGREC_REDO_INSERT_ROW_HEAD"); } translog_destroy(); @@ -320,6 +336,7 @@ int main(int argc, char *argv[]) translog_free_record_header(&rec); goto err; } + ok(1, "read record"); translog_free_record_header(&rec); lsn= first_lsn; if (translog_init_scanner(first_lsn, 1, &scanner)) @@ -329,8 +346,6 @@ int main(int argc, char *argv[]) } for (i= 1;; i++) { - if (i % SHOW_DIVIDER == 0) - printf("read %d\n", i); len= translog_read_next_record_header(&scanner, &rec); if (len == 0) { @@ -406,6 +421,7 @@ int main(int argc, char *argv[]) goto err; } } + ok(1, "read record"); translog_free_record_header(&rec); len= translog_read_next_record_header(&scanner, &rec); @@ -500,6 +516,7 @@ int main(int argc, char *argv[]) goto err; } } + ok(1, "read record"); translog_free_record_header(&rec); len= translog_read_next_record_header(&scanner, &rec); @@ -533,6 +550,7 @@ int main(int argc, char *argv[]) translog_free_record_header(&rec); goto err; } + ok(1, "read record"); translog_free_record_header(&rec); lsn= rec.lsn; @@ -563,14 +581,20 @@ int main(int argc, char *argv[]) translog_free_record_header(&rec); goto err; } + ok(1, "read record"); + translog_free_record_header(&rec); } } rc= 0; err: + if (rc) + ok(0, "read record"); translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); + if (maria_log_remove()) + exit(1); - return (test(exit_status() || rc)); + return (test(exit_status())); } diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index eb64d15868c..78f526aa3cf 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -2,6 +2,7 @@ #include #include #include +extern my_bool maria_log_remove(); #ifndef DBUG_OFF static const char *default_dbug_option; @@ -33,7 +34,7 @@ static byte *long_buffer; Get pseudo-random length of the field in limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE] - SYNOPSYS + SYNOPSIS get_len() RETURN @@ -101,8 +102,7 @@ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, { int res= 0; translog_size_t len; - DBUG_ENTER("read_and_check_content"); - DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2); + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL)) != rec->record_length) { @@ -111,7 +111,7 @@ static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, res= 1; } res|= check_content(buffer + skip, rec->record_length - skip); - DBUG_RETURN(res); + return(res); } void writer(int num) @@ -119,7 +119,6 @@ void writer(int num) LSN lsn; byte long_tr_id[6]; uint i; - DBUG_ENTER("writer"); for (i= 0; i < ITERATIONS; i++) { @@ -135,6 +134,9 @@ void writer(int num) fprintf(stderr, "Can't write LOGREC_LONG_TRANSACTION_ID record #%lu " "thread %i\n", (ulong) i, num); translog_destroy(); + pthread_mutex_lock(&LOCK_thread_count); + ok(0, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); return; } lsns1[num][i]= lsn; @@ -144,25 +146,17 @@ void writer(int num) { fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); translog_destroy(); + pthread_mutex_lock(&LOCK_thread_count); + ok(0, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); return; } lsns2[num][i]= lsn; - DBUG_PRINT("info", ("thread: %u, iteration: %u, len: %lu, " - "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)", - num, i, (ulong) lens[num][i], - (ulong) LSN_FILE_NO(lsns1[num][i]), - (ulong) LSN_OFFSET(lsns1[num][i]), - (ulong) LSN_FILE_NO(lsns2[num][i]), - (ulong) LSN_OFFSET(lsns2[num][i]))); - printf("thread: %u, iteration: %u, len: %lu, " - "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)\n", - num, i, (ulong) lens[num][i], - (ulong) LSN_FILE_NO(lsns1[num][i]), - (ulong) LSN_OFFSET(lsns1[num][i]), - (ulong) LSN_FILE_NO(lsns2[num][i]), - (ulong) LSN_OFFSET(lsns2[num][i])); + pthread_mutex_lock(&LOCK_thread_count); + ok(1, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); } - DBUG_VOID_RETURN; + return; } @@ -171,20 +165,18 @@ static void *test_thread_writer(void *arg) int param= *((int*) arg); my_thread_init(); - DBUG_ENTER("test_writer"); - DBUG_PRINT("enter", ("param: %d", param)); writer(param); - DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); pthread_mutex_lock(&LOCK_thread_count); thread_count--; + ok(1, "writer finished"); /* just to show progress */ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ pthread_mutex_unlock(&LOCK_thread_count); free((gptr) arg); my_thread_end(); - DBUG_RETURN(0); + return(0); } @@ -201,6 +193,8 @@ int main(int argc, char **argv __attribute__ ((unused))) int *param, error; int rc; + plan(WRITERS + ITERATIONS * WRITERS * 3); + bzero(&pagecache, sizeof(pagecache)); maria_data_root= "."; long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); @@ -212,8 +206,10 @@ int main(int argc, char **argv __attribute__ ((unused))) for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) long_buffer[i]= (i & 0xFF); - MY_INIT(argv[0]); + if (maria_log_remove()) + exit(1); + #ifndef DBUG_OFF #if defined(__WIN__) @@ -228,8 +224,6 @@ int main(int argc, char **argv __attribute__ ((unused))) } #endif - DBUG_ENTER("main"); - DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); if ((error= pthread_cond_init(&COND_thread_count, NULL))) { @@ -327,7 +321,6 @@ int main(int argc, char **argv __attribute__ ((unused))) thread_count++; number_of_writers--; } - DBUG_PRINT("info", ("All threads are started")); pthread_mutex_unlock(&LOCK_thread_count); pthread_attr_destroy(&thr_attr); @@ -342,7 +335,6 @@ int main(int argc, char **argv __attribute__ ((unused))) } if ((error= pthread_mutex_unlock(&LOCK_thread_count))) fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error); - DBUG_PRINT("info", ("All threads ended")); /* Find last LSN and flush up to it (all our log) */ { @@ -352,11 +344,6 @@ int main(int argc, char **argv __attribute__ ((unused))) if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0) max= lsns2[i][ITERATIONS - 1]; } - DBUG_PRINT("info", ("first lsn: (%lu,0x%lx), max lsn: (%lu,0x%lx)", - (ulong) LSN_FILE_NO(first_lsn), - (ulong) LSN_OFFSET(first_lsn), - (ulong) LSN_FILE_NO(max), - (ulong) LSN_OFFSET(max))); translog_flush(max); } @@ -398,8 +385,6 @@ int main(int argc, char **argv __attribute__ ((unused))) } index= indeces[rec.short_trid] / 2; stage= indeces[rec.short_trid] % 2; - printf("read(%d) thread: %d, iteration %d, stage %d\n", - i, (uint) rec.short_trid, index, stage); if (stage == 0) { if (rec.type !=LOGREC_LONG_TRANSACTION_ID || @@ -459,6 +444,7 @@ int main(int argc, char **argv __attribute__ ((unused))) goto err; } } + ok(1, "record read"); translog_free_record_header(&rec); indeces[rec.short_trid]++; } @@ -466,9 +452,13 @@ int main(int argc, char **argv __attribute__ ((unused))) rc= 0; err: + if (rc) + ok(0, "record read"); translog_destroy(); end_pagecache(&pagecache, 1); ma_control_file_end(); + if (maria_log_remove()) + exit(1); - DBUG_RETURN(test(exit_status() || rc)); + return(exit_status()); } diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index fc9b34ff980..211eb6dea9e 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -3,6 +3,8 @@ #include #include +extern my_bool maria_log_remove(); + #ifndef DBUG_OFF static const char *default_dbug_option; #endif @@ -26,8 +28,12 @@ int main(int argc, char *argv[]) MY_INIT(argv[0]); + plan(1); + bzero(&pagecache, sizeof(pagecache)); maria_data_root= "."; + if (maria_log_remove()) + exit(1); /* be sure that we have no logs in the directory*/ if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0))) my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); @@ -127,8 +133,10 @@ int main(int argc, char *argv[]) "incorrect initial size of %s: %ld instead of %ld\n", first_translog_file, (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2)); + ok(0, "log triggered"); exit(1); } + ok(1, "log triggered"); translog_destroy(); end_pagecache(&pagecache, 1); From 3f4bd8e8a7f905dc8574a83cbbd7829c34d7a811 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 21 Feb 2007 16:03:04 +0200 Subject: [PATCH 91/95] Temporary disable unstable test --- mysql-test/t/disabled.def | 1 + 1 file changed, 1 insertion(+) diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def index 3b063841512..c7748318e51 100644 --- a/mysql-test/t/disabled.def +++ b/mysql-test/t/disabled.def @@ -40,3 +40,4 @@ flush2 : Bug#24805 Pushbuild can't handle test with --disable- maria : Until maria is fully functional ps_maria : Until maria is fully functional +rpl_ndb_dd_advance : unstable test disable till next merge From bce8576412a6db26304dd2d069fa24c1c5293e9f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 21 Feb 2007 23:30:58 +0200 Subject: [PATCH 92/95] More postreview changes storage/maria/ma_loghandler.c: Cursor checks made as one function. --- storage/maria/ma_loghandler.c | 93 ++++++++++++----------------------- 1 file changed, 32 insertions(+), 61 deletions(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index dd12c73770b..07cf48acd60 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -359,6 +359,27 @@ typedef struct st_translog_validator_data const char *maria_data_root; +/* + Check cursor/buffer consistence + + SYNOPSIS + translog_check_cursor + cursor cursor which will be checked +*/ + +#ifndef DBUG_OFF +static void translog_check_cursor(struct st_buffer_cursor *cursor) +{ + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr - cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == + cursor->current_page_fill % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); +} +#endif + /* Get file name of the log by log number @@ -696,11 +717,7 @@ static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, (ulong) (cursor->ptr - cursor->buffer->buffer))); - DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr - cursor->buffer->buffer) == - cursor->buffer->size)); - DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); - DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); DBUG_VOID_RETURN; } @@ -814,12 +831,8 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, (ulong) cursor->buffer->size, (ulong) (cursor->ptr -cursor->buffer->buffer), (uint) cursor->current_page_fill, (uint) left)); - DBUG_ASSERT(cursor->ptr !=NULL); - DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == - cursor->current_page_fill % TRANSLOG_PAGE_SIZE); DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)); - DBUG_ASSERT(LSN_OFFSET(cursor->buffer->offset) + - (cursor->ptr -cursor->buffer->buffer) == LSN_OFFSET(*horizon)); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); if (cursor->protected) { DBUG_PRINT("info", ("Already protected and finished")); @@ -843,10 +856,7 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, (ulong) (cursor->ptr - cursor->buffer->buffer))); - DBUG_ASSERT(cursor->chaser - || ((ulong) (cursor->ptr - cursor->buffer->buffer) == - cursor->buffer->size)); - DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); } if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) { @@ -1011,10 +1021,7 @@ static void translog_start_buffer(struct st_translog_buffer *buffer, (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, (ulong) (cursor->ptr - cursor->buffer->buffer))); - DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr - cursor->buffer->buffer) == - cursor->buffer->size)); - DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); DBUG_VOID_RETURN; } @@ -1890,7 +1897,7 @@ my_bool translog_init(const char *directory, highest XXXXXXXX & set logs_found TODO: check that last checkpoint within present log addresses space - find the log end + find the log end */ if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) { @@ -2042,13 +2049,7 @@ my_bool translog_init(const char *directory, (ulong) log_descriptor.bc.buffer->size, (ulong) (log_descriptor.bc.ptr - log_descriptor.bc. buffer->buffer))); - DBUG_ASSERT(log_descriptor.bc.chaser || - ((ulong) - (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == - log_descriptor.bc.buffer->size)); - DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == - log_descriptor.bc.buffer_no); - DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); } } DBUG_PRINT("info", ("Logs found: %d was recovered: %d", @@ -2332,11 +2333,7 @@ static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, cursor->chaser, (ulong) cursor->buffer->size, (ulong) (cursor->ptr - cursor->buffer->buffer))); - DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr - cursor->buffer->buffer) == - cursor->buffer->size)); - DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); - DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); DBUG_RETURN(0); } @@ -2430,17 +2427,7 @@ static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, (ulong) LSN_OFFSET(*horizon), (ulong) (LSN_OFFSET(cursor->buffer->offset) + cursor->buffer->size))); - /* - TODO: make one check function for the buffer/loghandler - */ - - DBUG_ASSERT(cursor->chaser || - ((ulong) (cursor->ptr - cursor->buffer->buffer) == - cursor->buffer->size)); - DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); - DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == - cursor->current_page_fill % TRANSLOG_PAGE_SIZE); - DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); DBUG_RETURN(0); } @@ -2698,9 +2685,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) translog_wait_for_buffer_free(new_buffer); min_offset= min(buffer_end_offset, file_end_offset); - /* - TODO: check is it ptr or size enough - */ + /* TODO: check is it ptr or size enough */ log_descriptor.bc.buffer->size+= min_offset; log_descriptor.bc.ptr+= min_offset; DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", @@ -2761,16 +2746,7 @@ static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) ("pointer moved to: (%lu, 0x%lx)", (ulong) LSN_FILE_NO(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); - DBUG_ASSERT(log_descriptor.bc.chaser || - ((ulong) (log_descriptor.bc.ptr - - log_descriptor.bc.buffer->buffer) - == log_descriptor.bc.buffer->size)); - DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == - log_descriptor.bc.buffer_no); - DBUG_ASSERT((log_descriptor.bc.ptr - log_descriptor.bc.buffer-> - buffer) %TRANSLOG_PAGE_SIZE == - log_descriptor.bc.current_page_fill % TRANSLOG_PAGE_SIZE); - DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); log_descriptor.bc.protected= 0; DBUG_RETURN(0); } @@ -5132,14 +5108,9 @@ static void translog_force_current_buffer_to_finish() new_buff_begunning+= log_descriptor.bc.buffer->size; /* increase offset */ DBUG_ASSERT(log_descriptor.bc.ptr !=NULL); - DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) - % TRANSLOG_PAGE_SIZE == - log_descriptor.bc.current_page_fill % TRANSLOG_PAGE_SIZE); DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) == LSN_FILE_NO(log_descriptor.bc.buffer->offset)); - DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) + - (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) == - LSN_OFFSET(log_descriptor.horizon)); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); if (left != 0) { From 2eb27365dfdd82b8db9c93db0df0f244b5ff8e80 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 21 Feb 2007 23:54:07 +0200 Subject: [PATCH 93/95] fixed C++ comment --- storage/maria/ma_loghandler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 07cf48acd60..1dc4e9171e3 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -2058,7 +2058,7 @@ my_bool translog_init(const char *directory, { /* Start new log system from scratch */ /* Used space */ - log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); // header page + log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* header page */ /* Current logs file number in page cache */ if ((log_descriptor.log_file_num[0]= open_logfile_by_number_no_cache(1)) == -1 || From a3f9083bb4649530b210423e21589803c075bb9e Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 22 Feb 2007 10:19:52 +0200 Subject: [PATCH 94/95] removed pthread_attr_setstacksize (not really nead but incompatible with some platforms) --- .../maria/unittest/ma_test_loghandler_multithread-t.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index 78f526aa3cf..1834e720328 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -250,14 +250,7 @@ int main(int argc, char **argv __attribute__ ((unused))) error, errno); exit(1); } -#ifndef pthread_attr_setstacksize /* void return value */ - if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) - { - fprintf(stderr, "Got error: %d from pthread_attr_setstacksize " - "(errno: %d)\n", error, errno); - exit(1); - } -#endif + #ifdef HAVE_THR_SETCONCURRENCY VOID(thr_setconcurrency(2)); #endif From fdf847fb62a0fcdf0edf25d6c8654b19eaa9a9ad Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 27 Feb 2007 10:59:01 +0200 Subject: [PATCH 95/95] comments fixed storage/maria/ma_loghandler.c: The function comment fixed. storage/maria/ma_loghandler_lsn.h: Comment of LSN/TRANSLOG_ADDRESS type fixed --- storage/maria/ma_loghandler.c | 3 +-- storage/maria/ma_loghandler_lsn.h | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 1dc4e9171e3..1e9afc7571d 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -4679,8 +4679,6 @@ translog_read_record_header_from_buffer(byte *page, buff log record header buffer NOTE - - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed - correctly. - Some type of record can be read completely by this call - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative LSN can be translated to absolute one), some fields can be added @@ -4786,6 +4784,7 @@ translog_read_record_header_scan(TRANSLOG_SCANNER_DATA its NOTES. - in case of end of the log buff->lsn will be set to (CONTROL_FILE_IMPOSSIBLE_LSN) + RETURN 0 error TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 End of the log diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index 9625f109864..1789d3ce61b 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -1,7 +1,12 @@ #ifndef _ma_loghandler_lsn_h #define _ma_loghandler_lsn_h -/* Transaction log record address (file_no is int24 on the disk) */ +/* + Transaction log record address: + file_no << 32 | offset + file_no is only 3 bytes so we can use signed integer to make + comparison more simple. +*/ typedef int64 TRANSLOG_ADDRESS; /*