From aa43e56b112b8afde201330af8a4fce2c89fed37 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Thu, 30 Mar 2006 14:20:54 +0200 Subject: [PATCH 01/36] ndb - bug#15695 bug#16447 bug#18612 For various reasone have a partitioned cluster been created This patch makes sure that when they connect 1) it's detected 2) shutdown is forced --- ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp | 66 ++++-- ndb/src/kernel/blocks/qmgr/Qmgr.hpp | 19 +- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 283 +++++++++++++++++++++--- 3 files changed, 322 insertions(+), 46 deletions(-) diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index 04761cb67a8..d017705395c 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -133,6 +133,9 @@ Cmvmi::~Cmvmi() { } +#ifdef ERROR_INSERT +NodeBitmask c_error_9000_nodes_mask; +#endif void Cmvmi::execNDB_TAMPER(Signal* signal) { @@ -390,21 +393,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) const Uint32 len = signal->getLength(); if(len == 2){ - globalTransporterRegistry.do_connect(tStartingNode); - globalTransporterRegistry.setIOState(tStartingNode, HaltIO); - //----------------------------------------------------- - // Report that the connection to the node is opened - //----------------------------------------------------- - signal->theData[0] = EventReport::CommunicationOpened; - signal->theData[1] = tStartingNode; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); - //----------------------------------------------------- +#ifdef ERROR_INSERT + if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode))) +#endif + { + globalTransporterRegistry.do_connect(tStartingNode); + globalTransporterRegistry.setIOState(tStartingNode, HaltIO); + + //----------------------------------------------------- + // Report that the connection to the node is opened + //----------------------------------------------------- + signal->theData[0] = EventReport::CommunicationOpened; + signal->theData[1] = tStartingNode; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); + //----------------------------------------------------- + } } else { for(unsigned int i = 1; i < MAX_NODES; i++ ) { jam(); if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ jam(); + +#ifdef ERROR_INSERT + if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i)) + continue; +#endif + globalTransporterRegistry.do_connect(i); globalTransporterRegistry.setIOState(i, HaltIO); @@ -1010,7 +1025,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){ + Uint32 arg = dumpState->args[0]; + if (arg == DumpStateOrd::CmvmiDumpConnections){ for(unsigned int i = 1; i < MAX_NODES; i++ ){ const char* nodeTypeStr = ""; switch(getNodeInfo(i).m_type){ @@ -1043,13 +1059,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){ + if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getNoOfFree()); } - if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) + if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert) { if(signal->getLength() == 1) { @@ -1069,7 +1085,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { + if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) { unsigned i; Uint32 loopCount = dumpState->args[1]; const unsigned len0 = 11; @@ -1097,6 +1113,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2); } +#ifdef ERROR_INSERT + if (arg == 9000) + { + SET_ERROR_INSERT_VALUE(9000); + for (Uint32 i = 1; igetLength(); i++) + c_error_9000_nodes_mask.set(signal->theData[i]); + } + + if (arg == 9001) + { + CLEAR_ERROR_INSERT_VALUE; + for (Uint32 i = 0; itheData[0] = 0; + signal->theData[1] = i; + EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2); + } + } + c_error_9000_nodes_mask.clear(); + } +#endif + #ifdef VM_TRACE #if 0 { diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index f6fafdae594..efcb8a30721 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -100,7 +100,12 @@ public: }; struct StartRecord { - void reset(){ m_startKey++; m_startNode = 0;} + void reset(){ + m_startKey++; + m_startNode = 0; + m_gsn = RNIL; + m_nodes.clearWaitingFor(); + } Uint32 m_startKey; Uint32 m_startNode; Uint64 m_startTimeout; @@ -112,6 +117,14 @@ public: NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NodeBitmask c_connectedNodes; // All kinds of connected nodes + + /** + * Nodes which we're checking for partitioned cluster + * + * i.e. nodes that connect to use, when we already have elected president + */ + NdbNodeBitmask c_cmregreq_nodes; + Uint32 c_maxDynamicId; // Records @@ -251,8 +264,10 @@ private: // Generated statement blocks void startphase1(Signal* signal); - void electionWon(); + void electionWon(Signal* signal); void cmInfoconf010Lab(Signal* signal); + bool check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + void apiHbHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal); void hbReceivedLab(Signal* signal); diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 70084e6b171..30e7f3f36a7 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -56,6 +56,33 @@ #define DEBUG_START3(signal, msg) #endif +/** + * c_start.m_gsn = GSN_CM_REGREQ + * Possible for all nodes + * c_start.m_nodes contains all nodes in config + * + * c_start.m_gsn = GSN_CM_NODEINFOREQ; + * Set when receiving CM_REGCONF + * State possible for starting node only (not in cluster) + * + * c_start.m_nodes contains all node in alive cluster that + * that has not replied to GSN_CM_NODEINFOREQ + * passed by president in GSN_CM_REGCONF + * + * c_start.m_gsn = GSN_CM_ADD + * Possible for president only + * Set when receiving and accepting CM_REGREQ (to include node) + * + * c_start.m_nodes contains all nodes in alive cluster + starting node + * that has not replied to GSN_CM_ADD + * by sending GSN_CM_ACKADD + * + * c_start.m_gsn = GSN_CM_NODEINFOCONF + * Possible for non presidents only + * c_start.m_nodes contains a node that has been accepted by president + * but has not connected to us yet + */ + // Signal entries and statement blocks /* 4 P R O G R A M */ /*******************************/ @@ -259,18 +286,24 @@ void Qmgr::execCONNECT_REP(Signal* signal) { jamEntry(); const Uint32 nodeId = signal->theData[0]; + + if (ERROR_INSERTED(931)) + { + jam(); + ndbout_c("Discarding CONNECT_REP(%d)", nodeId); + infoEvent("Discarding CONNECT_REP(%d)", nodeId); + return; + } + c_connectedNodes.set(nodeId); NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); switch(nodePtr.p->phase){ - case ZSTARTING: case ZRUNNING: + ndbrequire(!c_clusterNodes.get(nodeId)); + case ZSTARTING: jam(); - if(!c_start.m_nodes.isWaitingFor(nodeId)){ - jam(); - return; - } break; case ZPREPARE_FAIL: case ZFAIL_CLOSING: @@ -282,32 +315,64 @@ void Qmgr::execCONNECT_REP(Signal* signal) case ZAPI_INACTIVE: return; } - + + if (getNodeInfo(nodeId).getType() != NodeInfo::DB) + { + jam(); + return; + } + switch(c_start.m_gsn){ case GSN_CM_REGREQ: jam(); sendCmRegReq(signal, nodeId); + + /** + * We're waiting for CM_REGCONF c_start.m_nodes contains all configured + * nodes + */ + ndbrequire(nodePtr.p->phase == ZSTARTING); + ndbrequire(c_start.m_nodes.isWaitingFor(nodeId)); return; case GSN_CM_NODEINFOREQ: jam(); - sendCmNodeInfoReq(signal, nodeId, nodePtr.p); - return; - case GSN_CM_ADD:{ - jam(); - ndbrequire(getOwnNodeId() != cpresident); - c_start.m_nodes.clearWaitingFor(nodeId); - c_start.m_gsn = RNIL; - - NodeRecPtr addNodePtr; - addNodePtr.i = nodeId; - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - cmAddPrepare(signal, addNodePtr, nodePtr.p); + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(nodePtr.p->phase == ZSTARTING); + sendCmNodeInfoReq(signal, nodeId, nodePtr.p); + return; + } return; + case GSN_CM_NODEINFOCONF:{ + jam(); + + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(nodePtr.p->phase == ZRUNNING); + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + c_start.m_nodes.clearWaitingFor(nodeId); + c_start.m_gsn = RNIL; + + NodeRecPtr addNodePtr; + addNodePtr.i = nodeId; + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); + cmAddPrepare(signal, addNodePtr, nodePtr.p); + return; + } } default: - return; + (void)1; } + + ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId)); + ndbrequire(!c_cmregreq_nodes.get(nodeId)); + c_cmregreq_nodes.set(nodeId); + sendCmRegReq(signal, nodeId); + c_regReqReqSent--; return; }//Qmgr::execCONNECT_REP() @@ -601,22 +666,39 @@ void Qmgr::execCM_REGCONF(Signal* signal) jamEntry(); const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0]; + Uint32 presidentNodeId = cmRegConf->presidentNodeId; + + if (check_cmregreq_reply(signal, presidentNodeId, GSN_CM_REGCONF)) + { + jam(); + return; + } if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) { jam(); char buf[128]; - BaseString::snprintf(buf,sizeof(buf),"incompatible version own=0x%x other=0x%x, shutting down", NDB_VERSION, cmRegConf->presidentVersion); + BaseString::snprintf(buf,sizeof(buf), + "incompatible version own=0x%x other=0x%x, " + " shutting down", + NDB_VERSION, cmRegConf->presidentVersion); systemErrorLab(signal, __LINE__, buf); return; } - + myNodePtr.i = getOwnNodeId(); + ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); + + ndbrequire(c_start.m_gsn == GSN_CM_REGREQ); + ndbrequire(myNodePtr.p->phase = ZSTARTING); + cpdistref = cmRegConf->presidentBlockRef; cpresident = cmRegConf->presidentNodeId; UintR TdynamicId = cmRegConf->dynamicId; c_maxDynamicId = TdynamicId; c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes); + myNodePtr.p->ndynamicId = TdynamicId; + /*--------------------------------------------------------------*/ // Send this as an EVENT REPORT to inform about hearing about // other NDB node proclaiming to be president. @@ -627,10 +709,6 @@ void Qmgr::execCM_REGCONF(Signal* signal) signal->theData[3] = TdynamicId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - myNodePtr.i = getOwnNodeId(); - ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); - myNodePtr.p->ndynamicId = TdynamicId; - for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); if (c_clusterNodes.get(nodePtr.i)){ @@ -653,6 +731,134 @@ void Qmgr::execCM_REGCONF(Signal* signal) return; }//Qmgr::execCM_REGCONF() +bool +Qmgr::check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn) +{ + NodeRecPtr myNodePtr; + myNodePtr.i = getOwnNodeId(); + ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); + + NodeRecPtr nodePtr; + nodePtr.i = nodeId; + ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); + + /** + * Try to decide if replying node + * knows who is president + */ + Uint32 president_reply = RNIL; + switch(gsn){ + case GSN_CM_REGREF:{ + jam(); + CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); + switch(ref->errorCode){ + case CmRegRef::ZBUSY: + case CmRegRef::ZBUSY_PRESIDENT: + case CmRegRef::ZBUSY_TO_PRES: + jam(); + /** + * Only president replies this + */ + ndbrequire(nodeId == ref->presidentCandidate); + president_reply = nodeId; + break; + case CmRegRef::ZNOT_PRESIDENT: + jam(); + president_reply = ref->presidentCandidate; + break; + case CmRegRef::ZNOT_IN_CFG: + case CmRegRef::ZNOT_DEAD: + case CmRegRef::ZELECTION: + // Neither of these replies give certain president knowledge + jam(); + } + break; + } + case GSN_CM_REGCONF: + jam(); + president_reply = nodeId; + break; + } + + char buf[256]; + switch(c_start.m_gsn){ + case GSN_CM_REGREQ: + jam(); + ndbrequire(c_start.m_nodes.isWaitingFor(nodeId)); + ndbrequire(c_cmregreq_nodes.isclear()); + ndbrequire(myNodePtr.p->phase == ZSTARTING); + return false; + case GSN_CM_NODEINFOREQ: + jam(); + + ndbrequire(myNodePtr.p->phase == ZSTARTING); + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + /** + * We're waiting for CM_NODEINFO + */ + if (gsn == GSN_CM_REGREF) + { + jam(); + return false; + } + + jam(); + BaseString::snprintf(buf, sizeof(buf), + "Partitioned cluster! check StartPartialTimeout, " + " received CM_REGCONF from %d" + " while waiting for GSN_CM_NODEINFOCONF." + " president=%d", + nodeId, cpresident); + goto die_direct; + } + + goto check_reply; + default: + case GSN_CM_NODEINFOCONF: + jam(); + ndbrequire(myNodePtr.p->phase == ZRUNNING); + goto check_reply; + } + +check_reply: + jam(); + c_cmregreq_nodes.clear(nodeId); + + if (gsn == GSN_CM_REGCONF) + { + jam(); + BaseString::snprintf(buf, sizeof(buf), + "Partitioned cluster! check StartPartialTimeout, " + " received CM_REGCONF" + " from %d I think president: %d", + nodeId, cpresident); + goto die_direct; + } + + if (president_reply != RNIL && president_reply != cpresident) + { + jam(); + BaseString::snprintf(buf, sizeof(buf), + "Partitioned cluster! check StartPartialTimeout, " + " received CM_REGREF from %d specifying president as" + " %d, president: %d", + nodeId, president_reply, cpresident); + goto die_direct; + } + + return false; + +die_direct: + ndbout_c(buf); + progError(__LINE__, + ERR_ARBIT_SHUTDOWN, + buf); + + ndbrequire(false); +} + void Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend(); @@ -685,13 +891,21 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ void Qmgr::execCM_REGREF(Signal* signal) { jamEntry(); - c_regReqReqRecv++; - // Ignore block reference in data[0] UintR TaddNodeno = signal->theData[1]; UintR TrefuseReason = signal->theData[2]; Uint32 candidate = signal->theData[3]; DEBUG_START3(signal, TrefuseReason); + + if (check_cmregreq_reply(signal, TaddNodeno, GSN_CM_REGREF)) + { + jam(); + return; + } + + c_regReqReqRecv++; + + // Ignore block reference in data[0] if(candidate != cpresidentCandidate){ jam(); @@ -779,7 +993,7 @@ void Qmgr::execCM_REGREF(Signal* signal) Uint64 now = NdbTick_CurrentMillisecond(); if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){ jam(); - electionWon(); + electionWon(signal); sendSttorryLab(signal); /** @@ -793,7 +1007,7 @@ void Qmgr::execCM_REGREF(Signal* signal) }//Qmgr::execCM_REGREF() void -Qmgr::electionWon(){ +Qmgr::electionWon(Signal* signal){ NodeRecPtr myNodePtr; cpresident = getOwnNodeId(); /* This node becomes president. */ myNodePtr.i = getOwnNodeId(); @@ -812,6 +1026,12 @@ Qmgr::electionWon(){ cpresidentAlive = ZTRUE; c_stopElectionTime = ~0; c_start.reset(); + + signal->theData[0] = EventReport::CM_REGCONF; + signal->theData[1] = getOwnNodeId(); + signal->theData[2] = cpresident; + signal->theData[3] = 1; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); } /* @@ -946,7 +1166,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){ ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD); c_start.m_nodes.clearWaitingFor(); c_start.m_nodes.setWaitingFor(nodePtr.i); - c_start.m_gsn = GSN_CM_ADD; + c_start.m_gsn = GSN_CM_NODEINFOCONF; #else warningEvent("Enabling communication to CM_ADD node %u state=%d", nodePtr.i, @@ -1847,7 +2067,8 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0]; const Uint32 nodeId = rep->nodeId; c_connectedNodes.clear(nodeId); - + c_cmregreq_nodes.clear(nodeId); + NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); From 96075f47f602c87d8db92e33c789013ca3d10c83 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Fri, 31 Mar 2006 11:39:35 +0200 Subject: [PATCH 02/36] ndb - bug#16447 correct return value in check_cm_cmregreq --- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 10 ++++++++-- ndb/test/src/NdbRestarts.cpp | 3 +-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 30e7f3f36a7..991e60a3efd 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -848,10 +848,12 @@ check_reply: goto die_direct; } - return false; + return true; die_direct: ndbout_c(buf); + CRASH_INSERTION(932); + progError(__LINE__, ERR_ARBIT_SHUTDOWN, buf); @@ -2082,9 +2084,13 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) case ZFAIL_CLOSING: case ZAPI_ACTIVE: case ZAPI_INACTIVE: + { + char buf[100]; + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, ERR_SR_OTHERNODEFAILED, buf); ndbrequire(false); } - + } node_failed(signal, nodeId); }//DISCONNECT_REP diff --git a/ndb/test/src/NdbRestarts.cpp b/ndb/test/src/NdbRestarts.cpp index eea4af437c4..8465caaab48 100644 --- a/ndb/test/src/NdbRestarts.cpp +++ b/ndb/test/src/NdbRestarts.cpp @@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter, << ") secs " << endl; NdbSleep_SecSleep(seconds); - randomId = (rand() % _restarter.getNumDbNodes()); - nodeId = _restarter.getDbNodeId(randomId); + nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand()); g_info << _restart->m_name << ": node = "<< nodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0, From 9a12ddcf5e9ba084ae135ecb7842f2a243216e93 Mon Sep 17 00:00:00 2001 From: "lars@mysql.com" <> Date: Fri, 31 Mar 2006 11:48:08 +0200 Subject: [PATCH 03/36] BUG#18116: Changed of how mutex is handled for XA and rotating binlog --- sql/log.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/log.cc b/sql/log.cc index 85e8c4dae2f..ba02c9ba082 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -1882,7 +1882,11 @@ DBUG_skip_commit: rotate binlog, if necessary. */ if (commit_event->get_type_code() == XID_EVENT) - thread_safe_increment(prepared_xids, &LOCK_prep_xids); + { + pthread_mutex_lock(&LOCK_prep_xids); + prepared_xids++; + pthread_mutex_unlock(&LOCK_prep_xids); + } else rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); } From 6780538b261059b7e95511e4975b1149e702bf46 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Fri, 31 Mar 2006 16:36:43 +0200 Subject: [PATCH 04/36] ndb - add support for blocking/unblocking GCP using WAIT_GCP_REQ --- ndb/include/kernel/signaldata/WaitGCP.hpp | 7 ++++-- ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 28 +++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/ndb/include/kernel/signaldata/WaitGCP.hpp b/ndb/include/kernel/signaldata/WaitGCP.hpp index ebed28714d2..be2a5b9d5f0 100644 --- a/ndb/include/kernel/signaldata/WaitGCP.hpp +++ b/ndb/include/kernel/signaldata/WaitGCP.hpp @@ -46,7 +46,9 @@ public: Complete = 1, ///< Wait for a GCP to complete CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed CompleteIfRunning = 3, ///< Wait for ongoing GCP - CurrentGCI = 8 ///< Immediately return current GCI + CurrentGCI = 8, ///< Immediately return current GCI + BlockStartGcp = 9, + UnblockStartGcp = 10 }; Uint32 senderRef; @@ -70,11 +72,12 @@ class WaitGCPConf { //friend class Grep::PSCoord; public: - STATIC_CONST( SignalLength = 2 ); + STATIC_CONST( SignalLength = 3 ); public: Uint32 senderData; Uint32 gcp; + Uint32 blockStatus; }; class WaitGCPRef { diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index de35ce5c275..3bbf1c76644 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -14160,11 +14160,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) jam(); conf->senderData = senderData; conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); return; }//if + if (requestType == WaitGCPReq::BlockStartGcp) + { + jam(); + conf->senderData = senderData; + conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; + sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, + WaitGCPConf::SignalLength, JBB); + cgcpOrderBlocked = 1; + return; + } + + if (requestType == WaitGCPReq::UnblockStartGcp) + { + jam(); + conf->senderData = senderData; + conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; + sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, + WaitGCPConf::SignalLength, JBB); + cgcpOrderBlocked = 0; + return; + } + if(isMaster()) { /** * Master @@ -14176,6 +14201,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) jam(); conf->senderData = senderData; conf->gcp = coldgcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); return; @@ -14262,6 +14288,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal) conf->senderData = ptr.p->clientData; conf->gcp = gcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); @@ -14329,6 +14356,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal) c_waitGCPMasterList.next(ptr); conf->senderData = clientData; + conf->blockStatus = cgcpOrderBlocked; sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); From bde890effd37961e5e42498a15dedba1f0fc7998 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Fri, 31 Mar 2006 16:46:28 +0200 Subject: [PATCH 05/36] ndb - bug#18612 (detection of partitioned cluster) this also impl. gcp safe multi node shutdown 1) block gcp 2) wait for ongoing gcp 3) inform all stopping QMGR's (so that they don't start with error handler) 4) wait for all QMGR's to reply 5) broadcast failrep for stopping nodes 6) (if !master died) unblock gcp --- .../kernel/signaldata/DumpStateOrd.hpp | 1 + ndb/include/kernel/signaldata/FailRep.hpp | 6 +- ndb/include/kernel/signaldata/StopReq.hpp | 44 ++- ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp | 11 + ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp | 1 + ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 351 +++++++++++++++--- ndb/src/kernel/blocks/qmgr/Qmgr.hpp | 6 +- ndb/src/kernel/blocks/qmgr/QmgrInit.cpp | 2 + ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 46 ++- ndb/test/ndbapi/testNodeRestart.cpp | 112 +++++- 10 files changed, 506 insertions(+), 74 deletions(-) diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp index b42b930711c..a2993ad5d03 100644 --- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp +++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp @@ -64,6 +64,7 @@ public: // 19 NDBFS Fipple with O_SYNC, O_CREATE etc. // 20-24 BACKUP NdbcntrTestStopOnError = 25, + NdbcntrStopNodes = 70, // 100-105 TUP and ACC // 200-240 UTIL // 300-305 TRIX diff --git a/ndb/include/kernel/signaldata/FailRep.hpp b/ndb/include/kernel/signaldata/FailRep.hpp index 44577f07fdc..b1c16294e70 100644 --- a/ndb/include/kernel/signaldata/FailRep.hpp +++ b/ndb/include/kernel/signaldata/FailRep.hpp @@ -27,6 +27,7 @@ class FailRep { * Sender(s) & Reciver(s) */ friend class Qmgr; + friend class Ndbcntr; /** * For printing @@ -43,9 +44,10 @@ public: ZSTART_IN_REGREQ=3, ZHEARTBEAT_FAILURE=4, ZLINK_FAILURE=5, - ZOTHERNODE_FAILED_DURING_START=6 + ZOTHERNODE_FAILED_DURING_START=6, + ZMULTI_NODE_SHUTDOWN = 7 }; - + private: Uint32 failNodeId; diff --git a/ndb/include/kernel/signaldata/StopReq.hpp b/ndb/include/kernel/signaldata/StopReq.hpp index 8e6a0b90a91..8a9fde75b6c 100644 --- a/ndb/include/kernel/signaldata/StopReq.hpp +++ b/ndb/include/kernel/signaldata/StopReq.hpp @@ -32,7 +32,7 @@ class StopReq friend class MgmtSrvr; public: - STATIC_CONST( SignalLength = 9 ); + STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size); public: Uint32 senderRef; @@ -49,29 +49,34 @@ public: Int32 readOperationTimeout; // Timeout before read operations are aborted Int32 operationTimeout; // Timeout before all operations are aborted + Uint32 nodes[NdbNodeBitmask::Size]; + static void setSystemStop(Uint32 & requestInfo, bool value); static void setPerformRestart(Uint32 & requestInfo, bool value); static void setNoStart(Uint32 & requestInfo, bool value); static void setInitialStart(Uint32 & requestInfo, bool value); - static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value); /** * Don't perform "graceful" shutdown/restart... */ static void setStopAbort(Uint32 & requestInfo, bool value); + static void setStopNodes(Uint32 & requestInfo, bool value); static bool getSystemStop(const Uint32 & requestInfo); static bool getPerformRestart(const Uint32 & requestInfo); static bool getNoStart(const Uint32 & requestInfo); static bool getInitialStart(const Uint32 & requestInfo); - static bool getEscalateOnNodeFail(const Uint32 & requestInfo); static bool getStopAbort(const Uint32 & requestInfo); + static bool getStopNodes(const Uint32 & requestInfo); }; struct StopConf { STATIC_CONST( SignalLength = 2 ); Uint32 senderData; - Uint32 nodeState; + union { + Uint32 nodeState; + Uint32 nodeId; + }; }; class StopRef @@ -94,7 +99,9 @@ public: NodeShutdownInProgress = 1, SystemShutdownInProgress = 2, NodeShutdownWouldCauseSystemCrash = 3, - TransactionAbortFailed = 4 + TransactionAbortFailed = 4, + UnsupportedNodeShutdown = 5, + MultiNodeShutdownNotMaster = 6 }; public: @@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo) inline bool -StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo) +StopReq::getStopAbort(const Uint32 & requestInfo) { - return requestInfo & 16; + return requestInfo & 32; } inline bool -StopReq::getStopAbort(const Uint32 & requestInfo) +StopReq::getStopNodes(const Uint32 & requestInfo) { - return requestInfo & 32; + return requestInfo & 64; } @@ -185,16 +192,6 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value) requestInfo &= ~8; } -inline -void -StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value) -{ - if(value) - requestInfo |= 16; - else - requestInfo &= ~16; -} - inline void StopReq::setStopAbort(Uint32 & requestInfo, bool value) @@ -205,6 +202,15 @@ StopReq::setStopAbort(Uint32 & requestInfo, bool value) requestInfo &= ~32; } +inline +void +StopReq::setStopNodes(Uint32 & requestInfo, bool value) +{ + if(value) + requestInfo |= 64; + else + requestInfo &= ~64; +} #endif diff --git a/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp b/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp index 657133bda36..ae40a7c4581 100644 --- a/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp +++ b/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp @@ -202,6 +202,7 @@ private: void execWAIT_GCP_CONF(Signal* signal); void execSTOP_REQ(Signal* signal); + void execSTOP_CONF(Signal* signal); void execRESUME_REQ(Signal* signal); void execCHANGE_NODE_STATE_CONF(Signal* signal); @@ -337,6 +338,16 @@ public: void progError(int line, int cause, const char * extra) { cntr.progError(line, cause, extra); } + + enum StopNodesStep { + SR_BLOCK_GCP_START_GCP = 0, + SR_WAIT_COMPLETE_GCP = 1, + SR_UNBLOCK_GCP_START_GCP = 2, + SR_QMGR_STOP_REQ = 3, + SR_WAIT_NODE_FAILURES = 4, + SR_CLUSTER_SHUTDOWN = 12 + } m_state; + SignalCounter m_stop_req_counter; }; private: StopRecord c_stopRec; diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp index 97ca3f44b3a..cb20fb2ca22 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp @@ -86,6 +86,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf): addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF); addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ); + addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF); addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ); addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF); diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index e3ec1f9723e..5a841d6f836 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -42,6 +42,8 @@ #include #include +#include + #include #include #include @@ -1454,13 +1456,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) sendSignal(SUMA_REF, GSN_NODE_FAILREP, signal, NodeFailRep::SignalLength, JBB); + if (c_stopRec.stopReq.senderRef) + { + jam(); + switch(c_stopRec.m_state){ + case StopRecord::SR_WAIT_NODE_FAILURES: + { + jam(); + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + tmp.bitANDC(allFailed); + tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + + if (tmp.isclear()) + { + jam(); + if (c_stopRec.stopReq.senderRef != RNIL) + { + jam(); + StopConf * const stopConf = (StopConf *)&signal->theData[0]; + stopConf->senderData = c_stopRec.stopReq.senderData; + stopConf->nodeState = (Uint32) NodeState::SL_SINGLEUSER; + sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_CONF, signal, + StopConf::SignalLength, JBB); + } + + c_stopRec.stopReq.senderRef = 0; + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::UnblockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBA); + } + break; + } + case StopRecord::SR_QMGR_STOP_REQ: + { + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + tmp.bitANDC(allFailed); + + if (tmp.isclear()) + { + Uint32 nodeId = allFailed.find(0); + tmp.set(nodeId); + + StopConf* conf = (StopConf*)signal->getDataPtrSend(); + conf->senderData = c_stopRec.stopReq.senderData; + conf->nodeId = nodeId; + sendSignal(reference(), + GSN_STOP_CONF, signal, StopConf::SignalLength, JBB); + } + + tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + + break; + } + } + } + + signal->theData[0] = EventReport::NODE_FAILREP; + signal->theData[2] = 0; + Uint32 nodeId = 0; while(!allFailed.isclear()){ nodeId = allFailed.find(nodeId + 1); allFailed.clear(nodeId); - signal->theData[0] = EventReport::NODE_FAILREP; signal->theData[1] = nodeId; - signal->theData[2] = 0; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); }//for @@ -1908,13 +1971,15 @@ void Ndbcntr::execDUMP_STATE_ORD(Signal* signal) { DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if(signal->theData[0] == 13){ + Uint32 arg = dumpState->args[0]; + + if(arg == 13){ infoEvent("Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d", cstartPhase, cinternalStartphase, cndbBlocksCount); infoEvent("Cntr: cmasterNodeId = %d", cmasterNodeId); } - if (dumpState->args[0] == DumpStateOrd::NdbcntrTestStopOnError){ + if (arg == DumpStateOrd::NdbcntrTestStopOnError){ if (theConfiguration.stopOnError() == true) ((Configuration&)theConfiguration).stopOnError(false); @@ -1927,6 +1992,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal) SystemError::SignalLength, JBA); } + if (arg == DumpStateOrd::NdbcntrStopNodes) + { + NdbNodeBitmask mask; + for(Uint32 i = 1; igetLength(); i++) + mask.set(signal->theData[i]); + + StopReq* req = (StopReq*)signal->getDataPtrSend(); + req->senderRef = RNIL; + req->senderData = 123; + req->requestInfo = 0; + req->singleuser = 0; + req->singleUserApi = 0; + mask.copyto(NdbNodeBitmask::Size, req->nodes); + StopReq::setPerformRestart(req->requestInfo, 1); + StopReq::setNoStart(req->requestInfo, 1); + StopReq::setStopNodes(req->requestInfo, 1); + StopReq::setStopAbort(req->requestInfo, 1); + + sendSignal(reference(), GSN_STOP_REQ, signal, + StopReq::SignalLength, JBB); + return; + } }//Ndbcntr::execDUMP_STATE_ORD() @@ -1987,9 +2074,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ Uint32 senderData = req->senderData; BlockReference senderRef = req->senderRef; bool abort = StopReq::getStopAbort(req->requestInfo); + bool stopnodes = StopReq::getStopNodes(req->requestInfo); - if(getNodeState().startLevel < NodeState::SL_STARTED || - abort && !singleuser){ + if(!singleuser && + (getNodeState().startLevel < NodeState::SL_STARTED || + (abort && !stopnodes))) + { /** * Node is not started yet * @@ -2028,21 +2118,71 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ else ref->errorCode = StopRef::NodeShutdownInProgress; ref->senderData = senderData; - sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + return; + } + + if (stopnodes && !abort) + { + jam(); + ref->errorCode = StopRef::UnsupportedNodeShutdown; + ref->senderData = senderData; + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + return; + } + + if (stopnodes && cmasterNodeId != getOwnNodeId()) + { + jam(); + ref->errorCode = StopRef::MultiNodeShutdownNotMaster; + ref->senderData = senderData; + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; } c_stopRec.stopReq = * req; c_stopRec.stopInitiatedTime = NdbTick_CurrentMillisecond(); - if(!singleuser) { - if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) { + if (stopnodes) + { + jam(); + + if(!c_stopRec.checkNodeFail(signal)) + { jam(); - if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ + return; + } + + char buf[100]; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + infoEvent("Initiating shutdown abort of %s", mask.getText(buf)); + ndbout_c("Initiating shutdown abort of %s", mask.getText(buf)); + + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_BLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::BlockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + return; + } + else if(!singleuser) + { + if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) + { + jam(); + if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)) + { ((Configuration&)theConfiguration).stopOnError(false); } } - if(!c_stopRec.checkNodeFail(signal)){ + if(!c_stopRec.checkNodeFail(signal)) + { jam(); return; } @@ -2112,7 +2252,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ */ NodeBitmask ndbMask; ndbMask.assign(cntr.c_startedNodes); - ndbMask.clear(cntr.getOwnNodeId()); + + if (StopReq::getStopNodes(stopReq.requestInfo)) + { + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, stopReq.nodes); + ndbMask.bitANDC(tmp); + } + else + { + ndbMask.clear(cntr.getOwnNodeId()); + } CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; sd->blockRef = cntr.reference(); @@ -2134,7 +2284,8 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; const BlockReference bref = stopReq.senderRef; - cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + if (bref != RNIL) + cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); stopReq.senderRef = 0; @@ -2184,23 +2335,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){ if(stopReq.getSystemStop(stopReq.requestInfo) || stopReq.singleuser){ jam(); if(stopReq.singleuser) - { - jam(); - AbortAllReq * req = (AbortAllReq*)&signal->theData[0]; - req->senderRef = cntr.reference(); - req->senderData = 12; - cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal, - AbortAllReq::SignalLength, JBB); - } + { + jam(); + AbortAllReq * req = (AbortAllReq*)&signal->theData[0]; + req->senderRef = cntr.reference(); + req->senderData = 12; + cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal, + AbortAllReq::SignalLength, JBB); + } else - { - WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; - req->senderRef = cntr.reference(); - req->senderData = 12; - req->requestType = WaitGCPReq::CompleteForceStart; - cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, - WaitGCPReq::SignalLength, JBB); - } + { + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = cntr.reference(); + req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN; + req->requestType = WaitGCPReq::CompleteForceStart; + cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + } } else { jam(); StopPermReq * req = (StopPermReq*)&signal->theData[0]; @@ -2362,7 +2513,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){ WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; req->senderRef = reference(); - req->senderData = 12; + req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN; req->requestType = WaitGCPReq::CompleteForceStart; sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, WaitGCPReq::SignalLength, JBB); @@ -2371,29 +2522,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){ void Ndbcntr::execWAIT_GCP_CONF(Signal* signal){ jamEntry(); - ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)); - NodeState newState(NodeState::SL_STOPPING_3, true); + WaitGCPConf* conf = (WaitGCPConf*)signal->getDataPtr(); - /** - * Inform QMGR so that arbitrator won't kill us - */ - NodeStateRep * rep = (NodeStateRep *)&signal->theData[0]; - rep->nodeState = newState; - rep->nodeState.masterNodeId = cmasterNodeId; - rep->nodeState.setNodeGroup(c_nodeGroup); - EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, NodeStateRep::SignalLength); + switch(conf->senderData){ + case StopRecord::SR_BLOCK_GCP_START_GCP: + { + jam(); + /** + * + */ + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + goto unblock; + } + + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_WAIT_COMPLETE_GCP; + req->requestType = WaitGCPReq::CompleteIfRunning; - if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ - jam(); - StartOrd * startOrd = (StartOrd *)&signal->theData[0]; - startOrd->restartInfo = c_stopRec.stopReq.requestInfo; - sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500, - StartOrd::SignalLength); - } else { - jam(); - sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1); + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + return; + } + case StopRecord::SR_UNBLOCK_GCP_START_GCP: + { + jam(); + return; + } + case StopRecord::SR_WAIT_COMPLETE_GCP: + { + jam(); + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + goto unblock; + } + + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + c_stopRec.m_stop_req_counter = tmp; + NodeReceiverGroup rg(QMGR, tmp); + StopReq * stopReq = (StopReq *)&signal->theData[0]; + * stopReq = c_stopRec.stopReq; + stopReq->senderRef = reference(); + sendSignal(rg, GSN_STOP_REQ, signal, StopReq::SignalLength, JBA); + c_stopRec.m_state = StopRecord::SR_QMGR_STOP_REQ; + return; + } + case StopRecord::SR_CLUSTER_SHUTDOWN: + { + jam(); + break; + } + } + + { + ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)); + NodeState newState(NodeState::SL_STOPPING_3, true); + + /** + * Inform QMGR so that arbitrator won't kill us + */ + NodeStateRep * rep = (NodeStateRep *)&signal->theData[0]; + rep->nodeState = newState; + rep->nodeState.masterNodeId = cmasterNodeId; + rep->nodeState.setNodeGroup(c_nodeGroup); + EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, + NodeStateRep::SignalLength); + + if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ + jam(); + StartOrd * startOrd = (StartOrd *)&signal->theData[0]; + startOrd->restartInfo = c_stopRec.stopReq.requestInfo; + sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500, + StartOrd::SignalLength); + } else { + jam(); + sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1); + } + return; + } + +unblock: + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::UnblockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); +} + +void +Ndbcntr::execSTOP_CONF(Signal* signal) +{ + jamEntry(); + StopConf *conf = (StopConf*)signal->getDataPtr(); + ndbrequire(c_stopRec.m_state == StopRecord::SR_QMGR_STOP_REQ); + c_stopRec.m_stop_req_counter.clearWaitingFor(conf->nodeId); + if (c_stopRec.m_stop_req_counter.done()) + { + char buf[100]; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + infoEvent("Stopping of %s", mask.getText(buf)); + ndbout_c("Stopping of %s", mask.getText(buf)); + + /** + * Kill any node... + */ + FailRep * const failRep = (FailRep *)&signal->theData[0]; + failRep->failCause = FailRep::ZMULTI_NODE_SHUTDOWN; + NodeReceiverGroup rg(QMGR, c_clusterNodes); + Uint32 nodeId = 0; + while ((nodeId = NdbNodeBitmask::find(c_stopRec.stopReq.nodes, nodeId+1)) + != NdbNodeBitmask::NotFound) + { + failRep->failNodeId = nodeId; + sendSignal(rg, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA); + } + c_stopRec.m_state = StopRecord::SR_WAIT_NODE_FAILURES; + return; } - return; } void Ndbcntr::execSTTORRY(Signal* signal){ diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index efcb8a30721..3b623b36206 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "timer.hpp" @@ -218,6 +219,7 @@ private: void execPRES_TOCONF(Signal* signal); void execDISCONNECT_REP(Signal* signal); void execSYSTEM_ERROR(Signal* signal); + void execSTOP_REQ(Signal* signal); // Received signals void execDUMP_STATE_ORD(Signal* signal); @@ -402,7 +404,9 @@ private: Uint16 cfailedNodes[MAX_NDB_NODES]; Uint16 cprepFailedNodes[MAX_NDB_NODES]; Uint16 ccommitFailedNodes[MAX_NDB_NODES]; - + + StopReq c_stopReq; + void check_multi_node_shutdown(Signal* signal); }; #endif diff --git a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index 43d8f0971ed..ade880b7e4a 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -35,6 +35,7 @@ void Qmgr::initData() Uint32 hbDBAPI = 500; setHbApiDelay(hbDBAPI); + c_stopReq.senderRef = 0; }//Qmgr::initData() void Qmgr::initRecords() @@ -49,6 +50,7 @@ Qmgr::Qmgr(const class Configuration & conf) // Transit signals addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD); + addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ); addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG); addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB); addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT); diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 991e60a3efd..03f6fa2ae87 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2342,6 +2342,9 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, failedNodePtr.i = aFailedNode; ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); + + check_multi_node_shutdown(signal); + if (failedNodePtr.i == getOwnNodeId()) { jam(); @@ -2433,7 +2436,9 @@ void Qmgr::execPREP_FAILREQ(Signal* signal) { NodeRecPtr myNodePtr; jamEntry(); - + + check_multi_node_shutdown(signal); + PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0]; BlockReference Tblockref = prepFail->xxxBlockRef; @@ -4085,6 +4090,8 @@ Qmgr::stateArbitCrash(Signal* signal) if (! (arbitRec.getTimediff() > getArbitTimeout())) return; #endif + CRASH_INSERTION(932); + progError(__LINE__, ERR_ARBIT_SHUTDOWN, "Arbitrator decided to shutdown this node"); } @@ -4245,3 +4252,40 @@ Qmgr::execAPI_BROADCAST_REP(Signal* signal) NodeReceiverGroup rg(API_CLUSTERMGR, mask); sendSignal(rg, api.gsn, signal, len, JBB); // forward sections } + +void +Qmgr::execSTOP_REQ(Signal* signal) +{ + jamEntry(); + c_stopReq = * (StopReq*)signal->getDataPtr(); + + if (c_stopReq.senderRef) + { + ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId())); + + StopConf *conf = (StopConf*)signal->getDataPtrSend(); + conf->senderData = c_stopReq.senderData; + conf->nodeState = getOwnNodeId(); + sendSignal(c_stopReq.senderRef, + GSN_STOP_CONF, signal, StopConf::SignalLength, JBA); + } +} + +void +Qmgr::check_multi_node_shutdown(Signal* signal) +{ + if (c_stopReq.senderRef && + NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId())) + { + jam(); + if(StopReq::getPerformRestart(c_stopReq.requestInfo)) + { + jam(); + StartOrd * startOrd = (StartOrd *)&signal->theData[0]; + startOrd->restartInfo = c_stopReq.requestInfo; + EXECUTE_DIRECT(CMVMI, GSN_START_ORD, signal, 2); + } else { + EXECUTE_DIRECT(CMVMI, GSN_STOP_ORD, signal, 1); + } + } +} diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index 365d6e3ed6e..5f577b77f34 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -22,7 +22,7 @@ #include #include #include - +#include int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ @@ -669,6 +669,110 @@ err: return NDBT_FAILED; } +int +runBug18612(NDBT_Context* ctx, NDBT_Step* step){ + + // Assume two replicas + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Uint32 cnt = restarter.getNumDbNodes(); + + for(int loop = 0; loop < ctx->getNumLoops(); loop++) + { + int partition0[256]; + int partition1[256]; + bzero(partition0, sizeof(partition0)); + bzero(partition1, sizeof(partition1)); + Bitmask<4> nodesmask; + + Uint32 node1 = restarter.getDbNodeId(rand()%cnt); + for (Uint32 i = 0; i Date: Fri, 31 Mar 2006 18:53:07 +0200 Subject: [PATCH 06/36] ndb - autotest add new testpgrom for bug#18612 to autotest --- ndb/test/run-test/daily-basic-tests.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index ce5462d11c9..1e9bad1b969 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -458,10 +458,14 @@ args: -n Bug16772 T1 #cmd: testSystemRestart #args: -n Bug18385 T1 # -max-time: 500 +max-time: 1000 cmd: testNodeRestart args: -n Bug18414 T1 +max-time: 500 +cmd: testNodeRestart +args: -n Bug18612 T1 + # OLD FLEX max-time: 500 cmd: flexBench From 1aa9a95065cad59795076c17fe35edfd6f86deef Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Mon, 3 Apr 2006 11:26:29 +0200 Subject: [PATCH 07/36] ndb - bug#18612 post weeked fixes :-) change impl. to use READ_NODESREQ to query state of other qmgr(partition) this as it has no (current) side effects, so that it's possible only to kill starting cluster (if one started and one starting) --- ndb/include/kernel/signaldata/FailRep.hpp | 11 +- ndb/src/kernel/blocks/qmgr/Qmgr.hpp | 7 +- ndb/src/kernel/blocks/qmgr/QmgrInit.cpp | 3 + ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 230 ++++++++++------------ ndb/test/ndbapi/testNodeRestart.cpp | 4 +- 5 files changed, 126 insertions(+), 129 deletions(-) diff --git a/ndb/include/kernel/signaldata/FailRep.hpp b/ndb/include/kernel/signaldata/FailRep.hpp index b1c16294e70..f575d99e865 100644 --- a/ndb/include/kernel/signaldata/FailRep.hpp +++ b/ndb/include/kernel/signaldata/FailRep.hpp @@ -36,7 +36,8 @@ class FailRep { public: STATIC_CONST( SignalLength = 2 ); - + STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size ); + enum FailCause { ZOWN_FAILURE=0, ZOTHER_NODE_WHEN_WE_START=1, @@ -45,13 +46,19 @@ public: ZHEARTBEAT_FAILURE=4, ZLINK_FAILURE=5, ZOTHERNODE_FAILED_DURING_START=6, - ZMULTI_NODE_SHUTDOWN = 7 + ZMULTI_NODE_SHUTDOWN = 7, + ZPARTITIONED_CLUSTER = 8 }; private: Uint32 failNodeId; Uint32 failCause; + /** + * Used when failCause == ZPARTITIONED_CLUSTER + */ + Uint32 president; + Uint32 partition[NdbNodeBitmask::Size]; }; diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index 3b623b36206..07e6a2a10c1 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -124,7 +124,7 @@ public: * * i.e. nodes that connect to use, when we already have elected president */ - NdbNodeBitmask c_cmregreq_nodes; + NdbNodeBitmask c_readnodes_nodes; Uint32 c_maxDynamicId; @@ -233,6 +233,8 @@ private: void execREAD_NODESREQ(Signal* signal); void execSET_VAR_REQ(Signal* signal); + void execREAD_NODESREF(Signal* signal); + void execREAD_NODESCONF(Signal* signal); void execAPI_VERSION_REQ(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal); @@ -249,6 +251,8 @@ private: void execARBIT_STOPREP(Signal* signal); // Statement blocks + void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); void failReport(Signal* signal, @@ -268,7 +272,6 @@ private: void startphase1(Signal* signal); void electionWon(Signal* signal); void cmInfoconf010Lab(Signal* signal); - bool check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); void apiHbHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal); diff --git a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index ade880b7e4a..a8fe30d8cfa 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -94,6 +94,9 @@ Qmgr::Qmgr(const class Configuration & conf) addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF); addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP); + addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF); + addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF); + initData(); }//Qmgr::Qmgr() diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 03f6fa2ae87..c17922dff48 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -369,13 +369,29 @@ void Qmgr::execCONNECT_REP(Signal* signal) } ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId)); - ndbrequire(!c_cmregreq_nodes.get(nodeId)); - c_cmregreq_nodes.set(nodeId); - sendCmRegReq(signal, nodeId); - c_regReqReqSent--; + ndbrequire(!c_readnodes_nodes.get(nodeId)); + c_readnodes_nodes.set(nodeId); + signal->theData[0] = reference(); + sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA); return; }//Qmgr::execCONNECT_REP() +void +Qmgr::execREAD_NODESCONF(Signal* signal) +{ + check_readnodes_reply(signal, + refToNode(signal->getSendersBlockRef()), + GSN_READ_NODESCONF); +} + +void +Qmgr::execREAD_NODESREF(Signal* signal) +{ + check_readnodes_reply(signal, + refToNode(signal->getSendersBlockRef()), + GSN_READ_NODESREF); +} + /*******************************/ /* CM_INFOCONF */ /*******************************/ @@ -668,12 +684,6 @@ void Qmgr::execCM_REGCONF(Signal* signal) const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0]; Uint32 presidentNodeId = cmRegConf->presidentNodeId; - if (check_cmregreq_reply(signal, presidentNodeId, GSN_CM_REGCONF)) - { - jam(); - return; - } - if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) { jam(); char buf[128]; @@ -731,8 +741,8 @@ void Qmgr::execCM_REGCONF(Signal* signal) return; }//Qmgr::execCM_REGCONF() -bool -Qmgr::check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn) +void +Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn) { NodeRecPtr myNodePtr; myNodePtr.i = getOwnNodeId(); @@ -741,117 +751,65 @@ Qmgr::check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn) NodeRecPtr nodePtr; nodePtr.i = nodeId; ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); - - /** - * Try to decide if replying node - * knows who is president - */ - Uint32 president_reply = RNIL; - switch(gsn){ - case GSN_CM_REGREF:{ - jam(); - CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); - switch(ref->errorCode){ - case CmRegRef::ZBUSY: - case CmRegRef::ZBUSY_PRESIDENT: - case CmRegRef::ZBUSY_TO_PRES: - jam(); - /** - * Only president replies this - */ - ndbrequire(nodeId == ref->presidentCandidate); - president_reply = nodeId; - break; - case CmRegRef::ZNOT_PRESIDENT: - jam(); - president_reply = ref->presidentCandidate; - break; - case CmRegRef::ZNOT_IN_CFG: - case CmRegRef::ZNOT_DEAD: - case CmRegRef::ZELECTION: - // Neither of these replies give certain president knowledge - jam(); - } - break; - } - case GSN_CM_REGCONF: - jam(); - president_reply = nodeId; - break; - } - - char buf[256]; - switch(c_start.m_gsn){ - case GSN_CM_REGREQ: - jam(); - ndbrequire(c_start.m_nodes.isWaitingFor(nodeId)); - ndbrequire(c_cmregreq_nodes.isclear()); - ndbrequire(myNodePtr.p->phase == ZSTARTING); - return false; - case GSN_CM_NODEINFOREQ: - jam(); - ndbrequire(myNodePtr.p->phase == ZSTARTING); - if (c_start.m_nodes.isWaitingFor(nodeId)) - { - jam(); - /** - * We're waiting for CM_NODEINFO - */ - if (gsn == GSN_CM_REGREF) - { - jam(); - return false; - } - - jam(); - BaseString::snprintf(buf, sizeof(buf), - "Partitioned cluster! check StartPartialTimeout, " - " received CM_REGCONF from %d" - " while waiting for GSN_CM_NODEINFOCONF." - " president=%d", - nodeId, cpresident); - goto die_direct; - } - - goto check_reply; - default: - case GSN_CM_NODEINFOCONF: - jam(); - ndbrequire(myNodePtr.p->phase == ZRUNNING); - goto check_reply; - } - -check_reply: - jam(); - c_cmregreq_nodes.clear(nodeId); - - if (gsn == GSN_CM_REGCONF) + ndbrequire(c_readnodes_nodes.get(nodeId)); + ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr(); + if (gsn == GSN_READ_NODESREF) { jam(); - BaseString::snprintf(buf, sizeof(buf), - "Partitioned cluster! check StartPartialTimeout, " - " received CM_REGCONF" - " from %d I think president: %d", - nodeId, cpresident); - goto die_direct; +retry: + signal->theData[0] = reference(); + sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA); + return; } - if (president_reply != RNIL && president_reply != cpresident) + if (conf->masterNodeId == ZNIL) { jam(); - BaseString::snprintf(buf, sizeof(buf), - "Partitioned cluster! check StartPartialTimeout, " - " received CM_REGREF from %d specifying president as" - " %d, president: %d", - nodeId, president_reply, cpresident); - goto die_direct; + goto retry; } - return true; + Uint32 president = conf->masterNodeId; + if (president == cpresident) + { + jam(); + c_readnodes_nodes.clear(nodeId); + return; + } + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), + "Partitioned cluster! check StartPartialTimeout, " + " node %d thinks %d is president, " + " I think president is: %d", + nodeId, president, cpresident); -die_direct: ndbout_c(buf); + CRASH_INSERTION(933); + + if (getNodeState().startLevel == NodeState::SL_STARTED) + { + jam(); + NdbNodeBitmask part; + part.assign(NdbNodeBitmask::Size, conf->clusterNodes); + FailRep* rep = (FailRep*)signal->getDataPtrSend(); + rep->failCause = FailRep::ZPARTITIONED_CLUSTER; + rep->president = cpresident; + c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partition); + Uint32 ref = calcQmgrBlockRef(nodeId); + Uint32 i = 0; + while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound) + { + if (i == nodeId) + continue; + rep->failNodeId = i; + sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA); + } + rep->failNodeId = nodeId; + sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBB); + return; + } + CRASH_INSERTION(932); progError(__LINE__, @@ -899,12 +857,6 @@ void Qmgr::execCM_REGREF(Signal* signal) Uint32 candidate = signal->theData[3]; DEBUG_START3(signal, TrefuseReason); - if (check_cmregreq_reply(signal, TaddNodeno, GSN_CM_REGREF)) - { - jam(); - return; - } - c_regReqReqRecv++; // Ignore block reference in data[0] @@ -2069,7 +2021,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0]; const Uint32 nodeId = rep->nodeId; c_connectedNodes.clear(nodeId); - c_cmregreq_nodes.clear(nodeId); + c_readnodes_nodes.clear(nodeId); NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); @@ -2342,13 +2294,16 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, failedNodePtr.i = aFailedNode; ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); + FailRep* rep = (FailRep*)signal->getDataPtr(); check_multi_node_shutdown(signal); if (failedNodePtr.i == getOwnNodeId()) { jam(); + Uint32 code = 0; const char * msg = 0; + char extra[100]; switch(aFailCause){ case FailRep::ZOWN_FAILURE: msg = "Own failure"; @@ -2369,17 +2324,46 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, case FailRep::ZLINK_FAILURE: msg = "Connection failure"; break; + case FailRep::ZPARTITIONED_CLUSTER: + { + code = ERR_ARBIT_SHUTDOWN; + char buf1[100], buf2[100]; + c_clusterNodes.getText(buf1); + if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength && + signal->header.theVerId_signalNumber == GSN_FAIL_REP) + { + jam(); + NdbNodeBitmask part; + part.assign(NdbNodeBitmask::Size, rep->partition); + part.getText(buf2); + BaseString::snprintf(extra, sizeof(extra), + "Partitioned cluster!" + " Our cluster: %s other cluster: %s", + buf1, buf2); + } + else + { + jam(); + BaseString::snprintf(extra, sizeof(extra), + "Partitioned cluster!" + " Our cluster: %s ", buf1); + } + msg = extra; + break; + } } - char buf[100]; - BaseString::snprintf(buf, 100, + CRASH_INSERTION(932); + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), "We(%u) have been declared dead by %u reason: %s(%u)", getOwnNodeId(), refToNode(signal->getSendersBlockRef()), aFailCause, msg ? msg : ""); - - progError(__LINE__, 0, buf); + + progError(__LINE__, code, buf); return; }//if diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index 5f577b77f34..bdf0069aa26 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -753,13 +753,13 @@ runBug18612(NDBT_Context* ctx, NDBT_Step* step){ if (restarter.dumpStateAllNodes(dump, 2)) return NDBT_FAILED; - if (restarter.waitClusterNoStart()) + if (restarter.waitNodesNoStart(partition0, cnt/2)) return NDBT_FAILED; for (Uint32 i = 0; i Date: Mon, 3 Apr 2006 12:09:50 +0200 Subject: [PATCH 08/36] ndb - bug#18612 - partitioned startup add testprg for SR case aswell --- ndb/test/ndbapi/testNodeRestart.cpp | 96 +++++++++++++++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 6 +- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index bdf0069aa26..d297527ac8b 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -772,6 +772,96 @@ runBug18612(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int +runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){ + + // Assume two replicas + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Uint32 cnt = restarter.getNumDbNodes(); + + for(int loop = 0; loop < ctx->getNumLoops(); loop++) + { + int partition0[256]; + int partition1[256]; + bzero(partition0, sizeof(partition0)); + bzero(partition1, sizeof(partition1)); + Bitmask<4> nodesmask; + + Uint32 node1 = restarter.getDbNodeId(rand()%cnt); + for (Uint32 i = 0; i Date: Mon, 3 Apr 2006 13:12:23 +0200 Subject: [PATCH 09/36] ndb - Fix compile error...when compiling debug --- ndb/include/kernel/signaldata/FailRep.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ndb/include/kernel/signaldata/FailRep.hpp b/ndb/include/kernel/signaldata/FailRep.hpp index f575d99e865..f2250f1af73 100644 --- a/ndb/include/kernel/signaldata/FailRep.hpp +++ b/ndb/include/kernel/signaldata/FailRep.hpp @@ -18,6 +18,7 @@ #define FAIL_REP_HPP #include "SignalData.hpp" +#include /** * From 2abc5e2f77ca6ef18826b42aa325431aac320674 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Mon, 3 Apr 2006 20:43:14 +0200 Subject: [PATCH 10/36] ndb - fix testprogam if only 1 node group --- ndb/test/ndbapi/testNodeRestart.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index d297527ac8b..7017aac0ade 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -694,7 +694,10 @@ runBug18612(NDBT_Context* ctx, NDBT_Step* step){ for (Uint32 i = 0; i Date: Tue, 4 Apr 2006 17:54:58 -0700 Subject: [PATCH 11/36] Bug #13601: Wrong int type for bit The wrong value was being reported as the field_length for BIT fields, resulting in confusion for at least Connector/J. The field_length is now always the number of bits in the field, as it should be. --- mysql-test/r/type_bit.result | 9 +++++ mysql-test/r/type_bit_innodb.result | 9 +++++ mysql-test/t/type_bit.test | 12 +++++++ mysql-test/t/type_bit_innodb.test | 12 +++++++ sql/field.cc | 53 +++++++++++++---------------- sql/field.h | 14 ++++---- sql/ha_ndbcluster.cc | 2 +- sql/key.cc | 3 +- 8 files changed, 75 insertions(+), 39 deletions(-) diff --git a/mysql-test/r/type_bit.result b/mysql-test/r/type_bit.result index c6f204e8452..2281ed44e3f 100644 --- a/mysql-test/r/type_bit.result +++ b/mysql-test/r/type_bit.result @@ -564,3 +564,12 @@ b1+0 sum(b1) sum(b2) 1 4 4 2 2 2 drop table t1, t2; +create table t1 (a bit(7)); +insert into t1 values (0x60); +select * from t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 a a 16 7 1 Y 0 0 63 +a +` +drop table t1; +End of 5.0 tests diff --git a/mysql-test/r/type_bit_innodb.result b/mysql-test/r/type_bit_innodb.result index 8d9c9756a33..1f6857277bd 100644 --- a/mysql-test/r/type_bit_innodb.result +++ b/mysql-test/r/type_bit_innodb.result @@ -402,3 +402,12 @@ t1 CREATE TABLE `t1` ( `b` bit(10) default NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1 drop table t1; +create table t1 (a bit(7)) engine=innodb; +insert into t1 values (0x60); +select * from t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 a a 16 7 1 Y 0 0 63 +a +` +drop table t1; +End of 5.0 tests diff --git a/mysql-test/t/type_bit.test b/mysql-test/t/type_bit.test index 1f05a9574d6..e028dbc51d9 100644 --- a/mysql-test/t/type_bit.test +++ b/mysql-test/t/type_bit.test @@ -227,3 +227,15 @@ select sum(a1), b1+0, b2+0 from t1 join t2 on b1 = b2 group by b1 order by 1; select 1 from t1 join t2 on b1 = b2 group by b1 order by 1; select b1+0,sum(b1), sum(b2) from t1 join t2 on b1 = b2 group by b1 order by 1; drop table t1, t2; + +# +# Bug #13601: Wrong field length reported for BIT fields +# +create table t1 (a bit(7)); +insert into t1 values (0x60); +--enable_metadata +select * from t1; +--disable_metadata +drop table t1; + +--echo End of 5.0 tests diff --git a/mysql-test/t/type_bit_innodb.test b/mysql-test/t/type_bit_innodb.test index ec433f40a88..dbca69d67f0 100644 --- a/mysql-test/t/type_bit_innodb.test +++ b/mysql-test/t/type_bit_innodb.test @@ -133,3 +133,15 @@ show create table t1; alter table t1 engine=innodb; show create table t1; drop table t1; + +# +# Bug #13601: Wrong field length reported for BIT fields +# +create table t1 (a bit(7)) engine=innodb; +insert into t1 values (0x60); +--enable_metadata +select * from t1; +--disable_metadata +drop table t1; + +--echo End of 5.0 tests diff --git a/sql/field.cc b/sql/field.cc index eab62cd1958..51efddb701c 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -7918,9 +7918,10 @@ Field_bit::Field_bit(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, enum utype unireg_check_arg, const char *field_name_arg, struct st_table *table_arg) - : Field(ptr_arg, len_arg >> 3, null_ptr_arg, null_bit_arg, + : Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, table_arg), - bit_ptr(bit_ptr_arg), bit_ofs(bit_ofs_arg), bit_len(len_arg & 7) + bit_ptr(bit_ptr_arg), bit_ofs(bit_ofs_arg), bit_len(len_arg & 7), + bytes_in_rec(len_arg / 8) { /* Ensure that Field::eq() can distinguish between two different bit fields. @@ -7956,14 +7957,14 @@ int Field_bit::store(const char *from, uint length, CHARSET_INFO *cs) int delta; for (; length && !*from; from++, length--); // skip left 0's - delta= field_length - length; + delta= bytes_in_rec - length; if (delta < -1 || (delta == -1 && (uchar) *from > ((1 << bit_len) - 1)) || (!bit_len && delta < 0)) { set_rec_bits(0xff, bit_ptr, bit_ofs, bit_len); - memset(ptr, 0xff, field_length); + memset(ptr, 0xff, bytes_in_rec); if (table->in_use->really_abort_on_warning()) set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1); else @@ -7991,7 +7992,7 @@ int Field_bit::store(const char *from, uint length, CHARSET_INFO *cs) set_rec_bits((uchar) *from, bit_ptr, bit_ofs, bit_len); from++; } - memcpy(ptr, from, field_length); + memcpy(ptr, from, bytes_in_rec); } return 0; } @@ -8032,10 +8033,10 @@ longlong Field_bit::val_int(void) if (bit_len) { bits= get_rec_bits(bit_ptr, bit_ofs, bit_len); - bits<<= (field_length * 8); + bits<<= (bytes_in_rec * 8); } - switch (field_length) { + switch (bytes_in_rec) { case 0: return bits; case 1: return bits | (ulonglong) (uchar) ptr[0]; case 2: return bits | mi_uint2korr(ptr); @@ -8044,7 +8045,7 @@ longlong Field_bit::val_int(void) case 5: return bits | mi_uint5korr(ptr); case 6: return bits | mi_uint6korr(ptr); case 7: return bits | mi_uint7korr(ptr); - default: return mi_uint8korr(ptr + field_length - sizeof(longlong)); + default: return mi_uint8korr(ptr + bytes_in_rec - sizeof(longlong)); } } @@ -8097,7 +8098,7 @@ int Field_bit::cmp_offset(uint row_offset) if ((flag= (int) (bits_a - bits_b))) return flag; } - return memcmp(ptr, ptr + row_offset, field_length); + return memcmp(ptr, ptr + row_offset, bytes_in_rec); } @@ -8109,7 +8110,7 @@ void Field_bit::get_key_image(char *buff, uint length, imagetype type) *buff++= bits; length--; } - memcpy(buff, ptr, min(length, field_length)); + memcpy(buff, ptr, min(length, bytes_in_rec)); } @@ -8117,22 +8118,22 @@ void Field_bit::sql_type(String &res) const { CHARSET_INFO *cs= res.charset(); ulong length= cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(), - "bit(%d)", - (int) field_length * 8 + bit_len); + "bit(%d)", (int) field_length); res.length((uint) length); } char *Field_bit::pack(char *to, const char *from, uint max_length) { - uint length= min(field_length + (bit_len > 0), max_length); + DBUG_ASSERT(max_length); + uint length; if (bit_len) { uchar bits= get_rec_bits(bit_ptr, bit_ofs, bit_len); *to++= bits; - length--; } - memcpy(to, from, length); + length= min(bytes_in_rec, max_length - (bit_len > 0)); + memcpy(to, from, length); return to + length; } @@ -8144,8 +8145,8 @@ const char *Field_bit::unpack(char *to, const char *from) set_rec_bits(*from, bit_ptr, bit_ofs, bit_len); from++; } - memcpy(to, from, field_length); - return from + field_length; + memcpy(to, from, bytes_in_rec); + return from + bytes_in_rec; } @@ -8159,26 +8160,25 @@ Field_bit_as_char::Field_bit_as_char(char *ptr_arg, uint32 len_arg, const char *field_name_arg, struct st_table *table_arg) : Field_bit(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, 0, - 0, unireg_check_arg, field_name_arg, table_arg), - create_length(len_arg) + 0, unireg_check_arg, field_name_arg, table_arg) { bit_len= 0; - field_length= ((len_arg + 7) & ~7) / 8; + bytes_in_rec= (len_arg + 7) / 8; } int Field_bit_as_char::store(const char *from, uint length, CHARSET_INFO *cs) { int delta; - uchar bits= create_length & 7; + uchar bits= field_length & 7; for (; length && !*from; from++, length--); // skip left 0's - delta= field_length - length; + delta= bytes_in_rec - length; if (delta < 0 || (delta == 0 && bits && (uint) (uchar) *from >= (uint) (1 << bits))) { - memset(ptr, 0xff, field_length); + memset(ptr, 0xff, bytes_in_rec); if (bits) *ptr&= ((1 << bits) - 1); /* set first byte */ if (table->in_use->really_abort_on_warning()) @@ -8197,7 +8197,7 @@ void Field_bit_as_char::sql_type(String &res) const { CHARSET_INFO *cs= res.charset(); ulong length= cs->cset->snprintf(cs, (char*) res.ptr(), res.alloced_length(), - "bit(%d)", (int) create_length); + "bit(%d)", (int) field_length); res.length((uint) length); } @@ -8923,11 +8923,6 @@ create_field::create_field(Field *old_field,Field *orig_field) geom_type= ((Field_geom*)old_field)->geom_type; break; #endif - case FIELD_TYPE_BIT: - length= (old_field->key_type() == HA_KEYTYPE_BIT) ? - ((Field_bit *) old_field)->bit_len + length * 8 : - ((Field_bit_as_char *) old_field)->create_length; - break; default: break; } diff --git a/sql/field.h b/sql/field.h index e8dd7f05f99..891e8e392f7 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1303,17 +1303,18 @@ public: uchar *bit_ptr; // position in record where 'uneven' bits store uchar bit_ofs; // offset to 'uneven' high bits uint bit_len; // number of 'uneven' high bits + uint bytes_in_rec; Field_bit(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, enum utype unireg_check_arg, const char *field_name_arg, struct st_table *table_arg); enum_field_types type() const { return FIELD_TYPE_BIT; } enum ha_base_keytype key_type() const { return HA_KEYTYPE_BIT; } - uint32 key_length() const { return (uint32) field_length + (bit_len > 0); } - uint32 max_length() { return (uint32) field_length * 8 + bit_len; } + uint32 key_length() const { return (uint32) (field_length + 7) / 8; } + uint32 max_length() { return field_length; } uint size_of() const { return sizeof(*this); } Item_result result_type () const { return INT_RESULT; } - void reset(void) { bzero(ptr, field_length); } + void reset(void) { bzero(ptr, bytes_in_rec); } int store(const char *to, uint length, CHARSET_INFO *charset); int store(double nr); int store(longlong nr, bool unsigned_val); @@ -1335,9 +1336,8 @@ public: { Field_bit::store(buff, length, &my_charset_bin); } void sort_string(char *buff, uint length) { get_key_image(buff, length, itRAW); } - uint32 pack_length() const - { return (uint32) field_length + (bit_len > 0); } - uint32 pack_length_in_rec() const { return field_length; } + uint32 pack_length() const { return (uint32) (field_length + 7) / 8; } + uint32 pack_length_in_rec() const { return bytes_in_rec; } void sql_type(String &str) const; char *pack(char *to, const char *from, uint max_length=~(uint) 0); const char *unpack(char* to, const char *from); @@ -1354,13 +1354,11 @@ public: class Field_bit_as_char: public Field_bit { public: - uchar create_length; Field_bit_as_char(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, struct st_table *table_arg); enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; } - uint32 max_length() { return (uint32) create_length; } uint size_of() const { return sizeof(*this); } int store(const char *to, uint length, CHARSET_INFO *charset); int store(double nr) { return Field_bit::store(nr); } diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index bc790ae0138..8c19cae34c1 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -3819,7 +3819,7 @@ static int create_ndb_column(NDBCOL &col, break; case MYSQL_TYPE_BIT: { - int no_of_bits= field->field_length*8 + ((Field_bit *) field)->bit_len; + int no_of_bits= field->field_length; col.setType(NDBCOL::Bit); if (!no_of_bits) col.setLength(1); diff --git a/sql/key.cc b/sql/key.cc index 9d86095f33e..75161e4f616 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -192,7 +192,8 @@ void key_restore(byte *to_record, byte *from_key, KEY *key_info, Field_bit *field= (Field_bit *) (key_part->field); if (field->bit_len) { - uchar bits= *(from_key + key_part->length - field->field_length -1); + uchar bits= *(from_key + key_part->length - + field->pack_length_in_rec() - 1); set_rec_bits(bits, to_record + key_part->null_offset + (key_part->null_bit == 128), field->bit_ofs, field->bit_len); From abc9354c4afe290f262a0321909ac39b776bfd9c Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Wed, 5 Apr 2006 11:52:35 +0200 Subject: [PATCH 12/36] merge --- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 473c996c9bc..ea256821924 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -834,7 +834,7 @@ retry: CRASH_INSERTION(932); progError(__LINE__, - ERR_ARBIT_SHUTDOWN, + NDBD_EXIT_ARBIT_SHUTDOWN, buf); ndbrequire(false); @@ -1002,7 +1002,7 @@ Qmgr::electionWon(Signal* signal){ c_stopElectionTime = ~0; c_start.reset(); - signal->theData[0] = EventReport::CM_REGCONF; + signal->theData[0] = NDB_LE_CM_REGCONF; signal->theData[1] = getOwnNodeId(); signal->theData[2] = cpresident; signal->theData[3] = 1; @@ -2071,7 +2071,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) { char buf[100]; BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); - progError(__LINE__, ERR_SR_OTHERNODEFAILED, buf); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } } @@ -2361,7 +2361,7 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, break; case FailRep::ZPARTITIONED_CLUSTER: { - code = ERR_ARBIT_SHUTDOWN; + code = NDBD_EXIT_ARBIT_SHUTDOWN; char buf1[100], buf2[100]; c_clusterNodes.getText(buf1); if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength && @@ -2386,6 +2386,11 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, msg = extra; break; } + case FailRep::ZMULTI_NODE_SHUTDOWN: + msg = "Multi node shutdown"; + break; + default: + msg = ""; } CRASH_INSERTION(932); From 40d83bdf6ebac0707a42e1898234218c31f824fb Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Thu, 6 Apr 2006 11:43:31 +0200 Subject: [PATCH 13/36] ndb - wl1760/bug#18216 add two new start options that will decrease likelyhood of bug#18612 push cntr-sp2 logic down into qmgr-sp1 to decrease likelyhood of bug#18612 --- .../kernel/signaldata/CmRegSignalData.hpp | 29 +- ndb/include/mgmapi/ndb_logevent.h | 13 +- ndb/src/common/debugger/EventLogger.cpp | 85 +++ ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 1 + ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 78 ++- ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 16 +- ndb/src/kernel/blocks/qmgr/Qmgr.hpp | 28 +- ndb/src/kernel/blocks/qmgr/QmgrInit.cpp | 3 + ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 548 ++++++++++++++++-- ndb/src/kernel/vm/Configuration.cpp | 45 ++ 10 files changed, 735 insertions(+), 111 deletions(-) diff --git a/ndb/include/kernel/signaldata/CmRegSignalData.hpp b/ndb/include/kernel/signaldata/CmRegSignalData.hpp index f33c991249f..ab51ed17bc3 100644 --- a/ndb/include/kernel/signaldata/CmRegSignalData.hpp +++ b/ndb/include/kernel/signaldata/CmRegSignalData.hpp @@ -30,12 +30,17 @@ class CmRegReq { friend class Qmgr; public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size ); private: Uint32 blockRef; Uint32 nodeId; - Uint32 version; // See ndb_version.h + Uint32 version; // See ndb_version.h + + Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType + Uint32 latest_gci; // 0 means no fs + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; /** @@ -59,8 +64,7 @@ private: * The dynamic id that the node reciving this signal has */ Uint32 dynamicId; - - Uint32 allNdbNodes[NdbNodeBitmask::Size]; + Uint32 allNdbNodes[NdbNodeBitmask::Size]; }; /** @@ -73,7 +77,7 @@ class CmRegRef { friend class Qmgr; public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size ); enum ErrorCode { ZBUSY = 0, /* Only the president can send this */ @@ -85,14 +89,27 @@ public: * as president. */ ZNOT_PRESIDENT = 5, /* We are not president */ ZNOT_DEAD = 6, /* We are not dead when we are starting */ - ZINCOMPATIBLE_VERSION = 7 + ZINCOMPATIBLE_VERSION = 7, + ZINCOMPATIBLE_START_TYPE = 8 }; private: Uint32 blockRef; Uint32 nodeId; Uint32 errorCode; + /** + * Applicable if ZELECTION + */ Uint32 presidentCandidate; + Uint32 candidate_latest_gci; // 0 means non + + /** + * Data for sending node sending node + */ + Uint32 latest_gci; + Uint32 start_type; + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; class CmAdd { diff --git a/ndb/include/mgmapi/ndb_logevent.h b/ndb/include/mgmapi/ndb_logevent.h index 6025ff2725c..d57646c14db 100644 --- a/ndb/include/mgmapi/ndb_logevent.h +++ b/ndb/include/mgmapi/ndb_logevent.h @@ -166,10 +166,14 @@ extern "C" { /** NDB_MGM_EVENT_CATEGORY_BACKUP */ NDB_LE_BackupCompleted = 56, /** NDB_MGM_EVENT_CATEGORY_BACKUP */ - NDB_LE_BackupAborted = 57 + NDB_LE_BackupAborted = 57, /* 58 used in 5.1 */ /* 59 used */ + + /** NDB_MGM_EVENT_CATEGORY_STARTUP */ + NDB_LE_StartReport = 60 + /* 60 unused */ /* 61 unused */ /* 62 unused */ @@ -625,6 +629,13 @@ extern "C" { unsigned type; unsigned node_id; } SingleUser; + /** Log even data @ref NDB_LE_StartReport */ + struct { + unsigned report_type; + unsigned remaining_time; + unsigned bitmask_size; + unsigned bitmask_data[1]; + } StartReport; #ifndef DOXYGEN_FIX }; #else diff --git a/ndb/src/common/debugger/EventLogger.cpp b/ndb/src/common/debugger/EventLogger.cpp index f785cda5215..e1a477b8ea4 100644 --- a/ndb/src/common/debugger/EventLogger.cpp +++ b/ndb/src/common/debugger/EventLogger.cpp @@ -707,6 +707,90 @@ void getTextSingleUser(QQQQ) { } } +void getTextStartReport(QQQQ) { + Uint32 time = theData[2]; + Uint32 sz = theData[3]; + char mask1[100]; + char mask2[100]; + char mask3[100]; + char mask4[100]; + BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1); + BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2); + BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3); + BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4); + switch(theData[1]){ + case 1: // Wait initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start, waiting for %s to connect, " + " nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 2: // Wait partial + BaseString::snprintf + (m_text, m_text_len, + "Waiting until nodes: %s connects, " + "nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 3: // Wait partial timeout + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for nodes %s to connect, " + "nodes [ all: %s connected: %s no-wait: %s ]", + + time, mask4, mask1, mask2, mask3); + break; + case 4: // Wait partioned + BaseString::snprintf + (m_text, m_text_len, + "Waiting for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + mask1, mask2, mask4, mask3); + break; + case 5: + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + time, mask1, mask2, mask4, mask3); + break; + case 0x8000: // Do initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8001: // Do start + BaseString::snprintf + (m_text, m_text_len, + "Start with all nodes %s", + mask2); + break; + case 0x8002: // Do partial + BaseString::snprintf + (m_text, m_text_len, + "Start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8003: // Do partioned + BaseString::snprintf + (m_text, m_text_len, + "Start potentially partitioned with nodes %s " + " [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + default: + BaseString::snprintf + (m_text, m_text_len, + "Unknown startreport: 0x%x [ %s %s %s %s ]", + theData[1], + mask1, mask2, mask3, mask4); + } +} + #if 0 BaseString::snprintf(m_text, m_text_len, @@ -755,6 +839,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = { ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ), + ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ), // NODERESTART ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ), diff --git a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index d6f6b772516..cd987048577 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -71,6 +71,7 @@ void Dbdih::initData() cwaitLcpSr = false; c_blockCommit = false; c_blockCommitNo = 1; + cntrlblockref = RNIL; }//Dbdih::initData() void Dbdih::initRecords() diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index e0dbc9bd272..e4b95a5af7d 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -11659,7 +11659,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Uint32 tmngNode; Uint32 tmngNodeGroup; Uint32 tmngLimit; - Uint32 i; + Uint32 i, j; /**----------------------------------------------------------------------- * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED @@ -11705,6 +11705,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup); }//if }//for + + for (i = 0; inodeCount; j++) + { + jam(); + mngNodeptr.i = NGPtr.p->nodesInGroup[j]; + ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord); + if (checkNodeAlive(NGPtr.p->nodesInGroup[j])) + { + alive = true; + break; + } + } + + if (!alive) + { + char buf[255]; + BaseString::snprintf + (buf, sizeof(buf), + "Illegal initial start, no alive node in nodegroup %u", i); + progError(__LINE__, + NDBD_EXIT_SR_RESTARTCONFLICT, + buf); + + } + } }//Dbdih::makeNodeGroups() /** @@ -12512,7 +12544,6 @@ void Dbdih::sendStartFragreq(Signal* signal, void Dbdih::setInitialActiveStatus() { NodeRecordPtr siaNodeptr; - Uint32 tsiaNodeActiveStatus; Uint32 tsiaNoActiveNodes; tsiaNoActiveNodes = csystemnodes - cnoHotSpare; @@ -12520,39 +12551,34 @@ void Dbdih::setInitialActiveStatus() SYSFILE->nodeStatus[i] = 0; for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) { ptrAss(siaNodeptr, nodeRecord); - if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) { + switch(siaNodeptr.p->nodeStatus){ + case NodeRecord::ALIVE: + case NodeRecord::DEAD: if (tsiaNoActiveNodes == 0) { jam(); siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare; } else { jam(); tsiaNoActiveNodes = tsiaNoActiveNodes - 1; - siaNodeptr.p->activeStatus = Sysfile::NS_Active; - }//if - } else { - jam(); - siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; - }//if - switch (siaNodeptr.p->activeStatus) { - case Sysfile::NS_Active: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_Active; - break; - case Sysfile::NS_HotSpare: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_HotSpare; - break; - case Sysfile::NS_NotDefined: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_NotDefined; + if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) + { + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_Active; + } + else + { + siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; + } + } break; default: - ndbrequire(false); - return; + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; break; - }//switch - Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus, - tsiaNodeActiveStatus); + }//if + Sysfile::setNodeStatus(siaNodeptr.i, + SYSFILE->nodeStatus, + siaNodeptr.p->activeStatus); }//for }//Dbdih::setInitialActiveStatus() diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index f5dab99ee35..3fc24e395b1 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -820,17 +820,9 @@ Ndbcntr::trySystemRestart(Signal* signal){ return false; } - if(!allNodes && c_start.m_startPartialTimeout > now){ - jam(); - return false; - } - NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART; - if(c_start.m_waiting.equal(c_start.m_withoutLog)){ - if(!allNodes){ - jam(); - return false; - } + if(c_start.m_waiting.equal(c_start.m_withoutLog)) + { jam(); srType = NodeState::ST_INITIAL_START; c_start.m_starting = c_start.m_withoutLog; // Used for starting... @@ -860,10 +852,6 @@ Ndbcntr::trySystemRestart(Signal* signal){ ndbrequire(false); // All nodes -> partitioning, which is not allowed } - if(c_start.m_startPartitionedTimeout > now){ - jam(); - return false; - } break; } diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index e698ddd981b..3feb0858e82 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -50,6 +50,7 @@ #define ZAPI_HB_HANDLING 3 #define ZTIMER_HANDLING 4 #define ZARBIT_HANDLING 5 +#define ZSTART_FAILURE_LIMIT 6 /* Error Codes ------------------------------*/ #define ZERRTOOMANY 1101 @@ -113,8 +114,19 @@ public: Uint32 m_gsn; SignalCounter m_nodes; - } c_start; + Uint32 m_latest_gci; + Uint32 m_start_type; + NdbNodeBitmask m_skip_nodes; + NdbNodeBitmask m_starting_nodes; + NdbNodeBitmask m_starting_nodes_w_log; + + Uint16 m_president_candidate; + Uint32 m_president_candidate_gci; + Uint16 m_regReqReqSent; + Uint16 m_regReqReqRecv; + } c_start; + NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NodeBitmask c_connectedNodes; // All kinds of connected nodes @@ -125,7 +137,7 @@ public: * i.e. nodes that connect to use, when we already have elected president */ NdbNodeBitmask c_readnodes_nodes; - + Uint32 c_maxDynamicId; // Records @@ -236,6 +248,9 @@ private: void execREAD_NODESREF(Signal* signal); void execREAD_NODESCONF(Signal* signal); + void execDIH_RESTARTREF(Signal* signal); + void execDIH_RESTARTCONF(Signal* signal); + void execAPI_VERSION_REQ(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal); @@ -252,6 +267,7 @@ private: // Statement blocks void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + Uint32 check_startup(Signal* signal); void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); @@ -374,12 +390,12 @@ private: /* Status flags ----------------------------------*/ Uint32 c_restartPartialTimeout; + Uint32 c_restartPartionedTimeout; + Uint32 c_restartFailureTimeout; + Uint64 c_start_election_time; Uint16 creadyDistCom; - Uint16 c_regReqReqSent; - Uint16 c_regReqReqRecv; - Uint64 c_stopElectionTime; - Uint16 cpresidentCandidate; + Uint16 cdelayRegreq; Uint16 cpresidentAlive; Uint16 cnoFailedNodes; diff --git a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index d0ecb114d0f..f14cbd48695 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -98,6 +98,9 @@ Qmgr::Qmgr(const class Configuration & conf) addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF); addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF); + + addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF); + addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF); initData(); }//Qmgr::Qmgr() diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index ea256821924..a45d35d343b 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -146,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal) runArbitThread(signal); return; break; + case ZSTART_FAILURE_LIMIT:{ + if (cpresident != ZNIL) + { + jam(); + return; + } + Uint64 now = NdbTick_CurrentMillisecond(); + if (now > (c_start_election_time + c_restartFailureTimeout)) + { + jam(); + BaseString tmp; + tmp.append("Shutting down node as total restart time exceeds " + " StartFailureTimeout as set in config file "); + if(c_restartFailureTimeout == ~0) + tmp.append(" 0 (inifinite)"); + else + tmp.appfmt(" %d", c_restartFailureTimeout); + + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str()); + } + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + return; + } default: jam(); // ZCOULD_NOT_OCCUR_ERROR; @@ -273,16 +297,30 @@ void Qmgr::startphase1(Signal* signal) nodePtr.p->phase = ZSTARTING; nodePtr.p->blockRef = reference(); c_connectedNodes.set(nodePtr.i); - - signal->theData[0] = 0; // no answer - signal->theData[1] = 0; // no id - signal->theData[2] = NodeInfo::DB; - sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); - - execCM_INFOCONF(signal); + + signal->theData[0] = reference(); + sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB); return; } +void +Qmgr::execDIH_RESTARTREF(Signal*signal) +{ + jamEntry(); + + c_start.m_latest_gci = 0; + execCM_INFOCONF(signal); +} + +void +Qmgr::execDIH_RESTARTCONF(Signal*signal) +{ + jamEntry(); + + c_start.m_latest_gci = signal->theData[1]; + execCM_INFOCONF(signal); +} + void Qmgr::setHbDelay(UintR aHbDelay) { hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay); @@ -418,25 +456,44 @@ Qmgr::execREAD_NODESREF(Signal* signal) /*******************************/ void Qmgr::execCM_INFOCONF(Signal* signal) { + /** + * Open communcation to all DB nodes + */ + signal->theData[0] = 0; // no answer + signal->theData[1] = 0; // no id + signal->theData[2] = NodeInfo::DB; + sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); + cpresident = ZNIL; - cpresidentCandidate = getOwnNodeId(); cpresidentAlive = ZFALSE; - c_stopElectionTime = NdbTick_CurrentMillisecond(); - c_stopElectionTime += c_restartPartialTimeout; + c_start_election_time = NdbTick_CurrentMillisecond(); + + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + cmInfoconf010Lab(signal); return; }//Qmgr::execCM_INFOCONF() +Uint32 g_start_type = 0; +NdbNodeBitmask g_nowait_nodes; // Set by clo + void Qmgr::cmInfoconf010Lab(Signal* signal) { c_start.m_startKey = 0; c_start.m_startNode = getOwnNodeId(); c_start.m_nodes.clearWaitingFor(); c_start.m_gsn = GSN_CM_REGREQ; + c_start.m_starting_nodes.clear(); + c_start.m_starting_nodes_w_log.clear(); + c_start.m_regReqReqSent = 0; + c_start.m_regReqReqRecv = 0; + c_start.m_skip_nodes = g_nowait_nodes; + c_start.m_skip_nodes.bitAND(c_definedNodes); + c_start.m_start_type = g_start_type; NodeRecPtr nodePtr; - c_regReqReqSent = c_regReqReqRecv = 0; cnoOfNodes = 0; for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); @@ -471,14 +528,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal) void Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ - c_regReqReqSent++; - CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0]; - cmRegReq->blockRef = reference(); - cmRegReq->nodeId = getOwnNodeId(); - cmRegReq->version = NDB_VERSION; + CmRegReq * req = (CmRegReq *)&signal->theData[0]; + req->blockRef = reference(); + req->nodeId = getOwnNodeId(); + req->version = NDB_VERSION; + req->latest_gci = c_start.m_latest_gci; + req->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes); const Uint32 ref = calcQmgrBlockRef(nodeId); sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB); DEBUG_START(GSN_CM_REGREQ, nodeId, ""); + + c_start.m_regReqReqSent++; } /* @@ -518,6 +579,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ /*******************************/ /* CM_REGREQ */ /*******************************/ +static +int +check_start_type(Uint32 starting, Uint32 own) +{ + if (starting == (1 << NodeState::ST_INITIAL_START) && + ((own & (1 << NodeState::ST_INITIAL_START)) == 0)) + { + return 1; + } + return 0; +} + void Qmgr::execCM_REGREQ(Signal* signal) { DEBUG_START3(signal, ""); @@ -529,6 +602,17 @@ void Qmgr::execCM_REGREQ(Signal* signal) const BlockReference Tblockref = cmRegReq->blockRef; const Uint32 startingVersion = cmRegReq->version; addNodePtr.i = cmRegReq->nodeId; + Uint32 gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; + + if (signal->getLength() == CmRegReq::SignalLength) + { + jam(); + gci = cmRegReq->latest_gci; + start_type = cmRegReq->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes); + } if (creadyDistCom == ZFALSE) { jam(); @@ -542,11 +626,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - - if (cpresident != getOwnNodeId()){ + if (check_start_type(start_type, c_start.m_start_type)) + { + jam(); + sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE); + return; + } + + if (cpresident != getOwnNodeId()) + { jam(); - if (cpresident == ZNIL) { + + if (cpresident == ZNIL) + { /*** * We don't know the president. * If the node to be added has lower node id @@ -554,13 +646,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) * candidate */ jam(); - if (addNodePtr.i < cpresidentCandidate) { + if (gci > c_start.m_president_candidate_gci || + (gci == c_start.m_president_candidate_gci && + addNodePtr.i < c_start.m_president_candidate)) + { jam(); - cpresidentCandidate = addNodePtr.i; - }//if + c_start.m_president_candidate = addNodePtr.i; + c_start.m_president_candidate_gci = gci; + ndbout_c("assign candidate: %u %u", addNodePtr.i, gci); + } sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION); return; - } + } + /** * We are not the president. * We know the president. @@ -570,7 +668,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (c_start.m_startNode != 0){ + if (c_start.m_startNode != 0) + { jam(); /** * President busy by adding another node @@ -579,7 +678,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (ctoStatus == Q_ACTIVE) { + if (ctoStatus == Q_ACTIVE) + { jam(); /** * Active taking over as president @@ -588,7 +688,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) { + if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) + { jam(); /** * The new node is not in config file @@ -597,13 +698,15 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); Phase phase = addNodePtr.p->phase; - if (phase != ZINIT){ + if (phase != ZINIT) + { jam(); DEBUG("phase = " << phase); sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD); return; - }//if + } jam(); /** @@ -675,7 +778,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef, ref->blockRef = reference(); ref->nodeId = getOwnNodeId(); ref->errorCode = Terror; - ref->presidentCandidate = (cpresident == ZNIL ? cpresidentCandidate : cpresident); + ref->presidentCandidate = + (cpresident == ZNIL ? c_start.m_president_candidate : cpresident); + ref->candidate_latest_gci = c_start.m_president_candidate_gci; + ref->latest_gci = c_start.m_latest_gci; + ref->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes); sendSignal(TBRef, GSN_CM_REGREF, signal, CmRegRef::SignalLength, JBB); DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), ""); @@ -869,28 +977,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ /*******************************/ /* CM_REGREF */ /*******************************/ +static +const char * +get_start_type_string(Uint32 st) +{ + static char buf[256]; + + if (st == 0) + { + return ""; + } + else + { + buf[0] = 0; + for(Uint32 i = 0; itheData[1]; - UintR TrefuseReason = signal->theData[2]; - Uint32 candidate = signal->theData[3]; + CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); + UintR TaddNodeno = ref->nodeId; + UintR TrefuseReason = ref->errorCode; + Uint32 candidate = ref->presidentCandidate; + Uint32 node_gci = 1; + Uint32 candidate_gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; DEBUG_START3(signal, TrefuseReason); - c_regReqReqRecv++; + if (signal->getLength() == CmRegRef::SignalLength) + { + jam(); + node_gci = ref->latest_gci; + candidate_gci = ref->candidate_latest_gci; + start_type = ref->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes); + } + + c_start.m_regReqReqRecv++; // Ignore block reference in data[0] - if(candidate != cpresidentCandidate){ + if(candidate != c_start.m_president_candidate) + { jam(); - c_regReqReqRecv = ~0; + c_start.m_regReqReqRecv = ~0; } - + + c_start.m_starting_nodes.set(TaddNodeno); + if (node_gci) + { + jam(); + c_start.m_starting_nodes_w_log.set(TaddNodeno); + } + + skip_nodes.bitAND(c_definedNodes); + c_start.m_skip_nodes.bitOR(skip_nodes); + + char buf[100]; switch (TrefuseReason) { case CmRegRef::ZINCOMPATIBLE_VERSION: jam(); - systemErrorLab(signal, __LINE__, "incompatible version, connection refused by running ndb node"); + systemErrorLab(signal, __LINE__, + "incompatible version, " + "connection refused by running ndb node"); + case CmRegRef::ZINCOMPATIBLE_START_TYPE: + jam(); + BaseString::snprintf(buf, sizeof(buf), + "incompatible start type detected: node %d" + " reports %s(%d) my start type: %s(%d)", + TaddNodeno, + get_start_type_string(start_type), start_type, + get_start_type_string(c_start.m_start_type), + c_start.m_start_type); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); break; case CmRegRef::ZBUSY: case CmRegRef::ZBUSY_TO_PRES: @@ -909,14 +1094,19 @@ void Qmgr::execCM_REGREF(Signal* signal) break; case CmRegRef::ZELECTION: jam(); - if (cpresidentCandidate > TaddNodeno) { + if (candidate_gci > c_start.m_president_candidate_gci || + (candidate_gci == c_start.m_president_candidate_gci && + candidate < c_start.m_president_candidate)) + { jam(); //---------------------------------------- /* We may already have a candidate */ /* choose the lowest nodeno */ //---------------------------------------- signal->theData[3] = 2; - cpresidentCandidate = TaddNodeno; + c_start.m_president_candidate = candidate; + c_start.m_president_candidate_gci = candidate_gci; + ndbout_c("assign candidate: %u %u", candidate, candidate_gci); } else { signal->theData[3] = 4; }//if @@ -944,32 +1134,34 @@ void Qmgr::execCM_REGREF(Signal* signal) //----------------------------------------- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - if(cpresidentAlive == ZTRUE){ + if(cpresidentAlive == ZTRUE) + { jam(); - DEBUG(""); + DEBUG("cpresidentAlive"); return; } - if(c_regReqReqSent != c_regReqReqRecv){ + if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv) + { jam(); - DEBUG( c_regReqReqSent << " != " << c_regReqReqRecv); + DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv); return; } - if(cpresidentCandidate != getOwnNodeId()){ + if(c_start.m_president_candidate != getOwnNodeId()) + { jam(); - DEBUG(""); + DEBUG("i'm not the candidate"); return; } - + /** - * All configured nodes has agreed + * All connected nodes has agreed */ - Uint64 now = NdbTick_CurrentMillisecond(); - if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){ + if(check_startup(signal)) + { jam(); electionWon(signal); - sendSttorryLab(signal); /** * Start timer handling @@ -981,6 +1173,190 @@ void Qmgr::execCM_REGREF(Signal* signal) return; }//Qmgr::execCM_REGREF() +Uint32 +Qmgr::check_startup(Signal* signal) +{ + Uint64 now = NdbTick_CurrentMillisecond(); + Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout; + Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout; + + /** + * First see if we should wait more... + */ + NdbNodeBitmask tmp; + tmp.bitOR(c_start.m_skip_nodes); + tmp.bitOR(c_start.m_starting_nodes); + + NdbNodeBitmask wait; + wait.assign(c_definedNodes); + wait.bitANDC(tmp); + + Uint32 retVal = 0; + NdbNodeBitmask report_mask; + + if ((c_start.m_latest_gci == 0) || + (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START))) + { + if (!tmp.equal(c_definedNodes)) + { + jam(); + signal->theData[1] = 1; + signal->theData[2] = ~0; + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + else + { + jam(); + signal->theData[1] = 0x8000; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + + { + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } + } + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result = sd->output; + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result_w_log = sd->output; + + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) + { + jam(); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + + /** + * Start partial has passed...check for partitioning... + */ + switch(result_w_log){ + case CheckNodeGroups::Lose: + jam(); + goto missing_nodegroup; + case CheckNodeGroups::Partitioning: + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + + ndbrequire(false); + +start_report: + jam(); + { + Uint32 sz = NdbNodeBitmask::Size; + signal->theData[0] = NDB_LE_StartReport; + signal->theData[3] = sz; + Uint32* ptr = signal->theData+4; + c_definedNodes.copyto(sz, ptr); ptr += sz; + c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz; + c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz; + report_mask.copyto(sz, ptr); ptr+= sz; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, + 4+4*NdbNodeBitmask::Size, JBB); + } + return retVal; + +missing_nodegroup: + jam(); + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); +} + void Qmgr::electionWon(Signal* signal){ NodeRecPtr myNodePtr; @@ -999,7 +1375,7 @@ Qmgr::electionWon(Signal* signal){ c_clusterNodes.set(getOwnNodeId()); cpresidentAlive = ZTRUE; - c_stopElectionTime = ~0; + c_start_election_time = ~0; c_start.reset(); signal->theData[0] = NDB_LE_CM_REGCONF; @@ -1007,6 +1383,13 @@ Qmgr::electionWon(Signal* signal){ signal->theData[2] = cpresident; signal->theData[3] = 1; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + c_start.m_starting_nodes.clear(getOwnNodeId()); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } } /* @@ -1020,7 +1403,15 @@ Qmgr::electionWon(Signal* signal){ /*--------------------------------------------------------------*/ void Qmgr::regreqTimeLimitLab(Signal* signal) { - if(cpresident == ZNIL){ + if(cpresident == ZNIL) + { + if (c_start.m_president_candidate == ZNIL) + { + jam(); + c_start.m_president_candidate = getOwnNodeId(); + ndbout_c("Assigning candidate to self: %d", getOwnNodeId()); + } + cmInfoconf010Lab(signal); } }//Qmgr::regreqTimelimitLab() @@ -1430,6 +1821,17 @@ void Qmgr::execCM_ACKADD(Signal* signal) */ handleArbitNdbAdd(signal, addNodePtr.i); c_start.reset(); + + if (c_start.m_starting_nodes.get(addNodePtr.i)) + { + jam(); + c_start.m_starting_nodes.clear(addNodePtr.i); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } + } return; }//switch ndbrequire(false); @@ -1583,7 +1985,8 @@ void Qmgr::initData(Signal* signal) cnoPrepFailedNodes = 0; creadyDistCom = ZFALSE; cpresident = ZNIL; - cpresidentCandidate = ZNIL; + c_start.m_president_candidate = ZNIL; + c_start.m_president_candidate_gci = 0; cpdistref = 0; cneighbourh = ZNIL; cneighbourl = ZNIL; @@ -1611,15 +2014,33 @@ void Qmgr::initData(Signal* signal) Uint32 hbDBAPI = 1500; Uint32 arbitTimeout = 1000; c_restartPartialTimeout = 30000; + c_restartPartionedTimeout = 60000; + c_restartFailureTimeout = ~0; ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB); ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout); ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT, &c_restartPartialTimeout); - if(c_restartPartialTimeout == 0){ + ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT, + &c_restartPartionedTimeout); + ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT, + &c_restartFailureTimeout); + + if(c_restartPartialTimeout == 0) + { c_restartPartialTimeout = ~0; } + if (c_restartPartionedTimeout ==0) + { + c_restartPartionedTimeout = ~0; + } + + if (c_restartFailureTimeout == 0) + { + c_restartFailureTimeout = ~0; + } + setHbDelay(hbDBDB); setHbApiDelay(hbDBAPI); setArbitTimeout(arbitTimeout); @@ -2051,6 +2472,16 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); + + char buf[100]; + if (getNodeState().startLevel < NodeState::SL_STARTED) + { + jam(); + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); + ndbrequire(false); + } + switch(nodePtr.p->phase){ case ZRUNNING: jam(); @@ -2069,7 +2500,6 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ndbrequire(false); case ZAPI_INACTIVE: { - char buf[100]; BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); @@ -4178,8 +4608,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal) case 1: infoEvent("creadyDistCom = %d, cpresident = %d\n", creadyDistCom, cpresident); - infoEvent("cpresidentAlive = %d, cpresidentCand = %d\n", - cpresidentAlive, cpresidentCandidate); + infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n", + cpresidentAlive, + c_start.m_president_candidate, + c_start.m_president_candidate_gci); infoEvent("ctoStatus = %d\n", ctoStatus); for(Uint32 i = 1; i arr; + str.split(arr, ","); + for (Uint32 i = 0; i 0 && val < MAX_NDB_NODES)) + { + ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s", + val, _nowait_nodes); + exit(-1); + } + g_nowait_nodes.set(val); + } + } + + if (_initialstart) + { + _initialStart = true; + g_start_type |= (1 << NodeState::ST_INITIAL_START); + } + return true; } From bd7a5740356638cfee2b3502699614d83e4e0f20 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Thu, 6 Apr 2006 11:51:24 +0200 Subject: [PATCH 14/36] post merge fixes --- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index a45d35d343b..2a78ed7f55a 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2474,9 +2474,11 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); char buf[100]; - if (getNodeState().startLevel < NodeState::SL_STARTED) + if (getNodeInfo(nodeId).getType() == NodeInfo::DB && + getNodeState().startLevel < NodeState::SL_STARTED) { jam(); + CRASH_INSERTION(932); BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); From f41db42287b685fcde3525c07347d71477638acb Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Thu, 6 Apr 2006 16:18:42 +0200 Subject: [PATCH 15/36] ndb - bug#18612 - post review fixes 1) make sure that check_multi_node_shutdown does not proceed (in stop case) 2) Fix printout --- ndb/src/kernel/blocks/qmgr/Qmgr.hpp | 2 +- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index 07e6a2a10c1..02be002cae0 100644 --- a/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -409,7 +409,7 @@ private: Uint16 ccommitFailedNodes[MAX_NDB_NODES]; StopReq c_stopReq; - void check_multi_node_shutdown(Signal* signal); + bool check_multi_node_shutdown(Signal* signal); }; #endif diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index c17922dff48..8b7caadfeb9 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2296,7 +2296,11 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); FailRep* rep = (FailRep*)signal->getDataPtr(); - check_multi_node_shutdown(signal); + if (check_multi_node_shutdown(signal)) + { + jam(); + return; + } if (failedNodePtr.i == getOwnNodeId()) { jam(); @@ -2360,8 +2364,8 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, "We(%u) have been declared dead by %u reason: %s(%u)", getOwnNodeId(), refToNode(signal->getSendersBlockRef()), - aFailCause, - msg ? msg : ""); + msg ? msg : "", + aFailCause); progError(__LINE__, code, buf); return; @@ -2421,7 +2425,11 @@ void Qmgr::execPREP_FAILREQ(Signal* signal) NodeRecPtr myNodePtr; jamEntry(); - check_multi_node_shutdown(signal); + if (check_multi_node_shutdown(signal)) + { + jam(); + return; + } PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0]; @@ -4255,7 +4263,7 @@ Qmgr::execSTOP_REQ(Signal* signal) } } -void +bool Qmgr::check_multi_node_shutdown(Signal* signal) { if (c_stopReq.senderRef && @@ -4271,5 +4279,7 @@ Qmgr::check_multi_node_shutdown(Signal* signal) } else { EXECUTE_DIRECT(CMVMI, GSN_STOP_ORD, signal, 1); } + return true; } + return false; } From 73a0ae9d9b9b1d9c0497724c33b85601ef04afb3 Mon Sep 17 00:00:00 2001 From: "joerg@mysql.com" <> Date: Thu, 6 Apr 2006 18:42:07 +0200 Subject: [PATCH 16/36] Backport of 5.1 test options "--with-ndbcluster" and "--with-ndbcluster-only" as dummies (ignored). --- mysql-test/mysql-test-run.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mysql-test/mysql-test-run.sh b/mysql-test/mysql-test-run.sh index 5853ffa201c..64eaaab9fa4 100644 --- a/mysql-test/mysql-test-run.sh +++ b/mysql-test/mysql-test-run.sh @@ -230,6 +230,9 @@ FAILED_CASES= EXTRA_MASTER_OPT="" EXTRA_MYSQL_TEST_OPT="" USE_RUNNING_SERVER=1 +# backport from 5.1, disabled - this substitution is not done in 4.0 +# USE_NDBCLUSTER=@USE_NDBCLUSTER@ +# USE_NDBCLUSTER_ONLY=0 DO_GCOV="" DO_GDB="" MANUAL_GDB="" @@ -259,6 +262,14 @@ while test $# -gt 0; do SLAVE_MYSQLD=`$ECHO "$1" | $SED -e "s;--slave-binary=;;"` ;; --local) USE_RUNNING_SERVER="" ;; --extern) USE_RUNNING_SERVER="1" ;; + --with-ndbcluster) +# USE_NDBCLUSTER="--ndbcluster" ;; + $ECHO "Option '--with-ndbcluster' is ignored in this version" ;; + --with-ndbcluster-only) +# USE_NDBCLUSTER="--ndbcluster" +# USE_NDBCLUSTER_SLAVE="--ndbcluster" +# USE_NDBCLUSTER_ONLY=1 ;; + $ECHO "Option '--with-ndbcluster-only' is ignored in this version" ;; --tmpdir=*) MYSQL_TMP_DIR=`$ECHO "$1" | $SED -e "s;--tmpdir=;;"` ;; --local-master) MASTER_MYPORT=3306; From 9bd0b97824216989e8d880f9d831cc522ae287a9 Mon Sep 17 00:00:00 2001 From: "igor@rurik.mysql.com" <> Date: Thu, 6 Apr 2006 15:29:15 -0700 Subject: [PATCH 17/36] Fixed bug #15917: unexpected complain for a NIST test case. The problem was due to the fact that with --lower-case-table-names set to 1 the function find_field_in_group did not convert the prefix 'HU' in HU.PROJ.CITY into lower case when looking for it in the group list. Yet the names in the group list were extended by the database name in lower case. --- mysql-test/r/having.result | 35 ++++++++++++++++++++++++++++++ mysql-test/t/having.test | 44 ++++++++++++++++++++++++++++++++++++++ sql/item.cc | 9 ++++++++ 3 files changed, 88 insertions(+) diff --git a/mysql-test/r/having.result b/mysql-test/r/having.result index 225d5a475ff..c827e11e50e 100644 --- a/mysql-test/r/having.result +++ b/mysql-test/r/having.result @@ -359,3 +359,38 @@ group by s1 collate latin1_swedish_ci having s1 = 'y'; s1 count(s1) y 1 drop table t1; +DROP SCHEMA IF EXISTS HU; +Warnings: +Note 1008 Can't drop database 'HU'; database doesn't exist +CREATE SCHEMA HU ; +USE HU ; +CREATE TABLE STAFF +(EMPNUM CHAR(3) NOT NULL UNIQUE, +EMPNAME CHAR(20), +GRADE DECIMAL(4), +CITY CHAR(15)); +CREATE TABLE PROJ +(PNUM CHAR(3) NOT NULL UNIQUE, +PNAME CHAR(20), +PTYPE CHAR(6), +BUDGET DECIMAL(9), +CITY CHAR(15)); +INSERT INTO STAFF VALUES ('E1','Alice',12,'Deale'); +INSERT INTO STAFF VALUES ('E2','Betty',10,'Vienna'); +INSERT INTO STAFF VALUES ('E3','Carmen',13,'Vienna'); +INSERT INTO STAFF VALUES ('E4','Don',12,'Deale'); +INSERT INTO STAFF VALUES ('E5','Ed',13,'Akron'); +INSERT INTO PROJ VALUES ('P1','MXSS','Design',10000,'Deale'); +INSERT INTO PROJ VALUES ('P2','CALM','Code',30000,'Vienna'); +INSERT INTO PROJ VALUES ('P3','SDP','Test',30000,'Tampa'); +INSERT INTO PROJ VALUES ('P4','SDP','Design',20000,'Deale'); +INSERT INTO PROJ VALUES ('P5','IRM','Test',10000,'Vienna'); +INSERT INTO PROJ VALUES ('P6','PAYR','Design',50000,'Deale'); +SELECT EMPNUM, GRADE*1000 +FROM HU.STAFF WHERE GRADE * 1000 > +ANY (SELECT SUM(BUDGET) FROM HU.PROJ +GROUP BY CITY, PTYPE +HAVING HU.PROJ.CITY = HU.STAFF.CITY); +EMPNUM GRADE*1000 +E3 13000 +DROP SCHEMA HU; diff --git a/mysql-test/t/having.test b/mysql-test/t/having.test index 78628bef198..9e5bc4bc136 100644 --- a/mysql-test/t/having.test +++ b/mysql-test/t/having.test @@ -347,3 +347,47 @@ group by s1 collate latin1_swedish_ci having s1 = 'y'; # MySQL returns: 1 row, with count(s1) = 1 drop table t1; + + +# +# Bug #15917: unexpected complain for a name in having clause +# when the server is run on Windows or with --lower-case-table-names=1 +# + +DROP SCHEMA IF EXISTS HU; +CREATE SCHEMA HU ; +USE HU ; + +CREATE TABLE STAFF + (EMPNUM CHAR(3) NOT NULL UNIQUE, + EMPNAME CHAR(20), + GRADE DECIMAL(4), + CITY CHAR(15)); + +CREATE TABLE PROJ + (PNUM CHAR(3) NOT NULL UNIQUE, + PNAME CHAR(20), + PTYPE CHAR(6), + BUDGET DECIMAL(9), + CITY CHAR(15)); + +INSERT INTO STAFF VALUES ('E1','Alice',12,'Deale'); +INSERT INTO STAFF VALUES ('E2','Betty',10,'Vienna'); +INSERT INTO STAFF VALUES ('E3','Carmen',13,'Vienna'); +INSERT INTO STAFF VALUES ('E4','Don',12,'Deale'); +INSERT INTO STAFF VALUES ('E5','Ed',13,'Akron'); + +INSERT INTO PROJ VALUES ('P1','MXSS','Design',10000,'Deale'); +INSERT INTO PROJ VALUES ('P2','CALM','Code',30000,'Vienna'); +INSERT INTO PROJ VALUES ('P3','SDP','Test',30000,'Tampa'); +INSERT INTO PROJ VALUES ('P4','SDP','Design',20000,'Deale'); +INSERT INTO PROJ VALUES ('P5','IRM','Test',10000,'Vienna'); +INSERT INTO PROJ VALUES ('P6','PAYR','Design',50000,'Deale'); + +SELECT EMPNUM, GRADE*1000 + FROM HU.STAFF WHERE GRADE * 1000 > + ANY (SELECT SUM(BUDGET) FROM HU.PROJ + GROUP BY CITY, PTYPE + HAVING HU.PROJ.CITY = HU.STAFF.CITY); + +DROP SCHEMA HU; diff --git a/sql/item.cc b/sql/item.cc index e1bde85e200..e3da950ceef 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3054,6 +3054,7 @@ static Item** find_field_in_group_list(Item *find_item, ORDER *group_list) int found_match_degree= 0; Item_ident *cur_field; int cur_match_degree= 0; + char name_buff[NAME_LEN+1]; if (find_item->type() == Item::FIELD_ITEM || find_item->type() == Item::REF_ITEM) @@ -3065,6 +3066,14 @@ static Item** find_field_in_group_list(Item *find_item, ORDER *group_list) else return NULL; + if (db_name && lower_case_table_names) + { + /* Convert database to lower case for comparison */ + strmake(name_buff, db_name, sizeof(name_buff)-1); + my_casedn_str(files_charset_info, name_buff); + db_name= name_buff; + } + DBUG_ASSERT(field_name != 0); for (ORDER *cur_group= group_list ; cur_group ; cur_group= cur_group->next) From 0f5939958076c68edcb6094cef4078179e4b8fad Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Fri, 7 Apr 2006 10:10:09 +0200 Subject: [PATCH 18/36] ndb - remove debug prinouts in Qmgr --- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index a029418777c..c98833da7b1 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -653,7 +653,6 @@ void Qmgr::execCM_REGREQ(Signal* signal) jam(); c_start.m_president_candidate = addNodePtr.i; c_start.m_president_candidate_gci = gci; - ndbout_c("assign candidate: %u %u", addNodePtr.i, gci); } sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION); return; @@ -1106,7 +1105,6 @@ void Qmgr::execCM_REGREF(Signal* signal) signal->theData[3] = 2; c_start.m_president_candidate = candidate; c_start.m_president_candidate_gci = candidate_gci; - ndbout_c("assign candidate: %u %u", candidate, candidate_gci); } else { signal->theData[3] = 4; }//if @@ -1409,7 +1407,6 @@ void Qmgr::regreqTimeLimitLab(Signal* signal) { jam(); c_start.m_president_candidate = getOwnNodeId(); - ndbout_c("Assigning candidate to self: %d", getOwnNodeId()); } cmInfoconf010Lab(signal); From 7fd9d66cdc1103c7e7ba19c6a35d157771060d2f Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Fri, 7 Apr 2006 10:39:47 +0200 Subject: [PATCH 19/36] Support for ndb multi-node shutdown --- ndb/include/kernel/signaldata/StopReq.hpp | 3 +- ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 5 + ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 199 +++++++-------- ndb/src/mgmclient/CommandInterpreter.cpp | 232 +++++++++++++----- ndb/src/mgmsrv/MgmtSrvr.cpp | 204 +++++++++------ ndb/src/mgmsrv/MgmtSrvr.hpp | 14 +- ndb/src/mgmsrv/Services.cpp | 37 +-- 7 files changed, 448 insertions(+), 246 deletions(-) diff --git a/ndb/include/kernel/signaldata/StopReq.hpp b/ndb/include/kernel/signaldata/StopReq.hpp index 8a9fde75b6c..70e195961ce 100644 --- a/ndb/include/kernel/signaldata/StopReq.hpp +++ b/ndb/include/kernel/signaldata/StopReq.hpp @@ -92,7 +92,7 @@ class StopRef friend class Ndbcntr; public: - STATIC_CONST( SignalLength = 2 ); + STATIC_CONST( SignalLength = 3 ); enum ErrorCode { OK = 0, @@ -107,6 +107,7 @@ public: public: Uint32 senderData; Uint32 errorCode; + Uint32 masterNodeId; }; inline diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index 3fc24e395b1..c403aad5516 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -2125,6 +2125,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ else ref->errorCode = StopRef::NodeShutdownInProgress; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); @@ -2136,6 +2137,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); ref->errorCode = StopRef::UnsupportedNodeShutdown; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; @@ -2146,6 +2148,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); ref->errorCode = StopRef::MultiNodeShutdownNotMaster; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; @@ -2289,6 +2292,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ref->senderData = stopReq.senderData; ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; + ref->masterNodeId = cntr.cmasterNodeId; const BlockReference bref = stopReq.senderRef; if (bref != RNIL) @@ -2437,6 +2441,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){ StopRef * const stopRef = (StopRef *)&signal->theData[0]; stopRef->senderData = c_stopRec.stopReq.senderData; stopRef->errorCode = StopRef::TransactionAbortFailed; + stopRef->masterNodeId = cmasterNodeId; sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); } diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 2a78ed7f55a..e9167e980b5 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -1216,115 +1216,116 @@ Qmgr::check_startup(Signal* signal) goto start_report; } } - const bool all = c_start.m_starting_nodes.equal(c_definedNodes); - CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; - { - /** - * Check for missing node group directly - */ - char buf[100]; - NdbNodeBitmask check; - check.assign(c_definedNodes); - check.bitANDC(c_start.m_starting_nodes); // Not connected nodes - check.bitOR(c_start.m_starting_nodes_w_log); - + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + + { + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } + } + sd->blockRef = reference(); sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; - sd->mask = check; + sd->mask = c_start.m_starting_nodes; EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); - - if (sd->output == CheckNodeGroups::Lose) + CheckNodeGroups::SignalLength); + + const Uint32 result = sd->output; + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result_w_log = sd->output; + + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) { jam(); - goto missing_nodegroup; - } - } - - sd->blockRef = reference(); - sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; - sd->mask = c_start.m_starting_nodes; - EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); - - const Uint32 result = sd->output; - - sd->blockRef = reference(); - sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; - sd->mask = c_start.m_starting_nodes_w_log; - EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); - - const Uint32 result_w_log = sd->output; - - if (tmp.equal(c_definedNodes)) - { - /** - * All nodes (wrt no-wait nodes) has connected... - * this means that we will now start or die - */ - jam(); - switch(result_w_log){ - case CheckNodeGroups::Lose: - { - jam(); - goto missing_nodegroup; - } - case CheckNodeGroups::Win: - signal->theData[1] = all ? 0x8001 : 0x8002; - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; - case CheckNodeGroups::Partitioning: - ndbrequire(result != CheckNodeGroups::Lose); - signal->theData[1] = - all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; - } - } - - if (now < partial_timeout) - { - jam(); - signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; - signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); - report_mask.assign(wait); - retVal = 0; - goto start_report; - } - - /** - * Start partial has passed...check for partitioning... - */ - switch(result_w_log){ - case CheckNodeGroups::Lose: - jam(); - goto missing_nodegroup; - case CheckNodeGroups::Partitioning: - if (now < partitioned_timeout && result != CheckNodeGroups::Win) - { - signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; - signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); retVal = 0; goto start_report; } - // Fall through... - case CheckNodeGroups::Win: - signal->theData[1] = - all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; + + /** + * Start partial has passed...check for partitioning... + */ + switch(result_w_log){ + case CheckNodeGroups::Lose: + jam(); + goto missing_nodegroup; + case CheckNodeGroups::Partitioning: + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } } - ndbrequire(false); start_report: diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index 74d7f879f9c..39c84fd8055 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -25,6 +25,7 @@ #endif #include +#include class MgmtSrvr; @@ -70,6 +71,9 @@ private: */ void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr); + void executeCommand(Vector &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes); /** * Parse the block specification part of the LOG* commands, * things after LOG*: [BLOCK = {ALL|+}] @@ -104,10 +108,14 @@ private: public: void executeStop(int processId, const char* parameters, bool all); + void executeStop(Vector &command_list, unsigned command_pos, + int *node_ids, int no_of_nodes); void executeEnterSingleUser(char* parameters); void executeExitSingleUser(char* parameters); void executeStart(int processId, const char* parameters, bool all); void executeRestart(int processId, const char* parameters, bool all); + void executeRestart(Vector &command_list, unsigned command_pos, + int *node_ids, int no_of_nodes); void executeLogLevel(int processId, const char* parameters, bool all); void executeError(int processId, const char* parameters, bool all); void executeLog(int processId, const char* parameters, bool all); @@ -643,9 +651,16 @@ CommandInterpreter::execute_impl(const char *_line) } } while (do_continue); // if there is anything in the line proceed + Vector command_list; + { + BaseString tmp(line); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); + } char* firstToken = strtok(line, " "); char* allAfterFirstToken = strtok(NULL, ""); - + if (strcasecmp(firstToken, "HELP") == 0 || strcasecmp(firstToken, "?") == 0) { executeHelp(allAfterFirstToken); @@ -723,22 +738,45 @@ CommandInterpreter::execute_impl(const char *_line) analyseAfterFirstToken(-1, allAfterFirstToken); } else { /** - * First token should be a digit, node ID + * First tokens should be digits, node ID's */ - int nodeId; - - if (! convert(firstToken, nodeId)) { + int node_ids[MAX_NODES]; + unsigned pos; + for (pos= 0; pos < command_list.size(); pos++) + { + int node_id; + if (convert(command_list[pos].c_str(), node_id)) + { + if (node_id <= 0) { + ndbout << "Invalid node ID: " << command_list[pos].c_str() + << "." << endl; + DBUG_RETURN(true); + } + node_ids[pos]= node_id; + continue; + } + break; + } + int no_of_nodes= pos; + if (no_of_nodes == 0) + { + /* No digit found */ invalid_command(_line); DBUG_RETURN(true); } - - if (nodeId <= 0) { - ndbout << "Invalid node ID: " << firstToken << "." << endl; + if (pos == command_list.size()) + { + /* No command found */ + invalid_command(_line); DBUG_RETURN(true); } - - analyseAfterFirstToken(nodeId, allAfterFirstToken); - + if (no_of_nodes == 1) + { + analyseAfterFirstToken(node_ids[0], allAfterFirstToken); + DBUG_RETURN(true); + } + executeCommand(command_list, pos, node_ids, no_of_nodes); + DBUG_RETURN(true); } DBUG_RETURN(true); } @@ -808,6 +846,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId, ndbout << endl; } +void +CommandInterpreter::executeCommand(Vector &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) +{ + const char *cmd= command_list[command_pos].c_str(); + if (strcasecmp("STOP", cmd) == 0) + { + executeStop(command_list, command_pos+1, node_ids, no_of_nodes); + return; + } + if (strcasecmp("RESTART", cmd) == 0) + { + executeRestart(command_list, command_pos+1, node_ids, no_of_nodes); + return; + } + ndbout_c("Invalid command: '%s' after multi node id list. " + "Expected STOP or RESTART.", cmd); + return; +} + /** * Get next nodeid larger than the give node_id. node_id will be * set to the next node_id in the list. node_id should be set @@ -1400,24 +1459,60 @@ CommandInterpreter::executeClusterLog(char* parameters) //***************************************************************************** void -CommandInterpreter::executeStop(int processId, const char *, bool all) +CommandInterpreter::executeStop(int processId, const char *parameters, + bool all) { - int result = 0; - if(all) { - result = ndb_mgm_stop(m_mgmsrv, 0, 0); - } else { - result = ndb_mgm_stop(m_mgmsrv, 1, &processId); + Vector command_list; + if (parameters) + { + BaseString tmp(parameters); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); } - if (result < 0) { - ndbout << "Shutdown failed." << endl; - printError(); - } else + if (all) + executeStop(command_list, 0, 0, 0); + else + executeStop(command_list, 0, &processId, 1); +} + +void +CommandInterpreter::executeStop(Vector &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) +{ + int abort= 0; + for (; command_pos < command_list.size(); command_pos++) + { + const char *item= command_list[command_pos].c_str(); + if (strcasecmp(item, "-A") == 0) { - if(all) - ndbout << "NDB Cluster has shutdown." << endl; - else - ndbout << "Node " << processId << " has shutdown." << endl; + abort= 1; + continue; } + ndbout_c("Invalid option: %s. Expecting -A after STOP", + item); + return; + } + + int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort); + if (result < 0) + { + ndbout_c("Shutdown failed."); + printError(); + } + else + { + if (node_ids == 0) + ndbout_c("NDB Cluster has shutdown."); + else + { + ndbout << "Node"; + for (int i= 0; i < no_of_nodes; i++) + ndbout << " " << node_ids[i]; + ndbout_c(" has shutdown."); + } + } } void @@ -1483,47 +1578,74 @@ CommandInterpreter::executeStart(int processId, const char* parameters, void CommandInterpreter::executeRestart(int processId, const char* parameters, - bool all) + bool all) +{ + Vector command_list; + if (parameters) + { + BaseString tmp(parameters); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); + } + if (all) + executeRestart(command_list, 0, 0, 0); + else + executeRestart(command_list, 0, &processId, 1); +} + +void +CommandInterpreter::executeRestart(Vector &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) { int result; - int nostart = 0; - int initialstart = 0; - int abort = 0; + int nostart= 0; + int initialstart= 0; + int abort= 0; - if(parameters != 0 && strlen(parameters) != 0){ - char * tmpString = my_strdup(parameters,MYF(MY_WME)); - My_auto_ptr ap1(tmpString); - char * tmpPtr = 0; - char * item = strtok_r(tmpString, " ", &tmpPtr); - while(item != NULL){ - if(strcasecmp(item, "-N") == 0) - nostart = 1; - if(strcasecmp(item, "-I") == 0) - initialstart = 1; - if(strcasecmp(item, "-A") == 0) - abort = 1; - item = strtok_r(NULL, " ", &tmpPtr); + for (; command_pos < command_list.size(); command_pos++) + { + const char *item= command_list[command_pos].c_str(); + if (strcasecmp(item, "-N") == 0) + { + nostart= 1; + continue; } + if (strcasecmp(item, "-I") == 0) + { + initialstart= 1; + continue; + } + if (strcasecmp(item, "-A") == 0) + { + abort= 1; + continue; + } + ndbout_c("Invalid option: %s. Expecting -A,-N or -I after RESTART", + item); + return; } - if(all) { - result = ndb_mgm_restart2(m_mgmsrv, 0, NULL, initialstart, nostart, abort); - } else { - int v[1]; - v[0] = processId; - result = ndb_mgm_restart2(m_mgmsrv, 1, v, initialstart, nostart, abort); - } + result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids, + initialstart, nostart, abort); if (result <= 0) { - ndbout.println("Restart failed.", result); + ndbout_c("Restart failed."); printError(); - } else + } + else + { + if (node_ids == 0) + ndbout_c("NDB Cluster is being restarted."); + else { - if(all) - ndbout << "NDB Cluster is being restarted." << endl; - else - ndbout_c("Node %d is being restarted.", processId); + ndbout << "Node"; + for (int i= 0; i < no_of_nodes; i++) + ndbout << " " << node_ids[i]; + ndbout_c(" is being restarted"); } + } } void diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index 76a045bc806..4c5ba033b66 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -294,6 +294,8 @@ static ErrorItem errorTable[] = {MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" }, {MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH, "Node shutdown would cause system crash" }, + {MgmtSrvr::UNSUPPORTED_NODE_SHUTDOWN, + "Unsupported multi node shutdown. Abort option required." }, {MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." }, {MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP, "Operation not allowed while nodes are starting or stopping."}, @@ -312,6 +314,9 @@ int MgmtSrvr::translateStopRef(Uint32 errCode) case StopRef::NodeShutdownWouldCauseSystemCrash: return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH; break; + case StopRef::UnsupportedNodeShutdown: + return UNSUPPORTED_NODE_SHUTDOWN; + break; } return 4999; } @@ -386,8 +391,9 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server, _ownReference(0), theSignalIdleList(NULL), theWaitState(WAIT_SUBSCRIBE_CONF), + m_local_mgm_handle(0), m_event_listner(this), - m_local_mgm_handle(0) + m_master_node(0) { DBUG_ENTER("MgmtSrvr::MgmtSrvr"); @@ -677,23 +683,16 @@ MgmtSrvr::~MgmtSrvr() int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond) { - if(nodeId == 0) - return 0; - - if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB) + if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB) return WRONG_PROCESS_TYPE; - // Check if we have contact with it if(unCond){ if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) return 0; - return NO_CONTACT_WITH_PROCESS; } - if (theFacade->get_node_alive(nodeId) == 0) { - return NO_CONTACT_WITH_PROCESS; - } else { + else if (theFacade->get_node_alive(nodeId) == true) return 0; - } + return NO_CONTACT_WITH_PROCESS; } void report_unknown_signal(SimpleSignal *signal) @@ -935,7 +934,7 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId, * distributed communication up. */ -int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, +int MgmtSrvr::sendSTOP_REQ(const Vector &node_ids, NodeBitmask &stoppedNodes, Uint32 singleUserNodeId, bool abort, @@ -945,6 +944,12 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, bool initialStart) { int error = 0; + DBUG_ENTER("MgmtSrvr::sendSTOP_REQ"); + DBUG_PRINT("enter", ("no of nodes: %d singleUseNodeId: %d " + "abort: %d stop: %d restart: %d " + "nostart: %d initialStart: %d", + node_ids.size(), singleUserNodeId, + abort, stop, restart, nostart, initialStart)); stoppedNodes.clear(); @@ -982,36 +987,46 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, // send the signals NodeBitmask nodes; - if (nodeId) + NodeId nodeId; + int use_master_node= 0; + int do_send= 0; + NdbNodeBitmask nodes_to_stop; { - if(nodeId==getOwnNodeId()) + for (unsigned i= 0; i < node_ids.size(); i++) + nodes_to_stop.set(node_ids[i]); + } + if (node_ids.size()) + { + do_send= 1; + if (node_ids.size() == 1) { - if(restart) - g_RestartServer= true; - g_StopServer= true; - return 0; + nodeId= node_ids[0]; + if (nodeId == getOwnNodeId()) + { + if (restart) + g_RestartServer= true; + g_StopServer= true; + DBUG_RETURN(0); + } + else if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) + { + error= sendStopMgmd(nodeId, abort, stop, restart, + nostart, initialStart); + if (error == 0) + stoppedNodes.set(nodeId); + DBUG_RETURN(error); + } } - if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB) + else // multi node stop, send to master { - int r; - if((r= okToSendTo(nodeId, true)) != 0) - return r; - if (ss.sendSignal(nodeId, &ssig) != SEND_OK) - return SEND_OR_RECEIVE_FAILED; + use_master_node= 1; + nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes); + StopReq::setStopNodes(stopReq->requestInfo, 1); } - else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) - { - error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart); - if(error==0) - stoppedNodes.set(nodeId); - return error; - } - else - return WRONG_PROCESS_TYPE; - nodes.set(nodeId); } else { + nodeId= 0; while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) { if(okToSendTo(nodeId, true) == 0) @@ -1032,8 +1047,30 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, } // now wait for the replies - while (!nodes.isclear()) + while (!nodes.isclear() || do_send) { + if (do_send) + { + int r; + assert(nodes.count() == 0); + if (use_master_node) + nodeId= m_master_node; + if ((r= okToSendTo(nodeId, true)) != 0) + { + bool next; + if (!use_master_node) + DBUG_RETURN(r); + m_master_node= nodeId= 0; + while((next= getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && + (r= okToSendTo(nodeId, true)) != 0); + if (!next) + DBUG_RETURN(NO_CONTACT_WITH_DB_NODES); + } + if (ss.sendSignal(nodeId, &ssig) != SEND_OK) + DBUG_RETURN(SEND_OR_RECEIVE_FAILED); + nodes.set(nodeId); + do_send= 0; + } SimpleSignal *signal = ss.waitFor(); int gsn = signal->readSignalNumber(); switch (gsn) { @@ -1045,6 +1082,13 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, #endif assert(nodes.get(nodeId)); nodes.clear(nodeId); + if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster) + { + assert(use_master_node); + m_master_node= ref->masterNodeId; + do_send= 1; + continue; + } error = translateStopRef(ref->errorCode); break; } @@ -1055,9 +1099,16 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ndbout_c("Node %d single user mode", nodeId); #endif assert(nodes.get(nodeId)); - assert(singleUserNodeId != 0); + if (singleUserNodeId != 0) + { + stoppedNodes.set(nodeId); + } + else + { + assert(node_ids.size() > 1); + stoppedNodes.bitOR(nodes_to_stop); + } nodes.clear(nodeId); - stoppedNodes.set(nodeId); break; } case GSN_NF_COMPLETEREP:{ @@ -1096,17 +1147,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, #ifdef VM_TRACE ndbout_c("Unknown signal %d", gsn); #endif - return SEND_OR_RECEIVE_FAILED; + DBUG_RETURN(SEND_OR_RECEIVE_FAILED); } } - return error; + DBUG_RETURN(error); } /* - * Stop one node + * Stop one nodes */ -int MgmtSrvr::stopNode(int nodeId, bool abort) +int MgmtSrvr::stopNodes(const Vector &node_ids, + int *stopCount, bool abort) { if (!abort) { @@ -1121,14 +1173,17 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) } } NodeBitmask nodes; - return sendSTOP_REQ(nodeId, - nodes, - 0, - abort, - false, - false, - false, - false); + int ret= sendSTOP_REQ(node_ids, + nodes, + 0, + abort, + false, + false, + false, + false); + if (stopCount) + *stopCount= nodes.count(); + return ret; } /* @@ -1138,7 +1193,8 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) int MgmtSrvr::stop(int * stopCount, bool abort) { NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, 0, abort, @@ -1169,7 +1225,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) return OPERATION_NOT_ALLOWED_START_STOP; } NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, singleUserNodeId, false, @@ -1186,18 +1243,22 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) * Perform node restart */ -int MgmtSrvr::restartNode(int nodeId, bool nostart, bool initialStart, - bool abort) +int MgmtSrvr::restartNodes(const Vector &node_ids, + int * stopCount, bool nostart, + bool initialStart, bool abort) { NodeBitmask nodes; - return sendSTOP_REQ(nodeId, - nodes, - 0, - abort, - false, - true, - nostart, - initialStart); + int ret= sendSTOP_REQ(node_ids, + nodes, + 0, + abort, + false, + true, + nostart, + initialStart); + if (stopCount) + *stopCount = nodes.count(); + return ret; } /* @@ -1208,7 +1269,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart, bool abort, int * stopCount ) { NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, 0, abort, @@ -2135,12 +2197,16 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) SignalSender ss(theFacade); ss.lock(); // lock will be released on exit - bool next; - NodeId nodeId = 0; - while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && - theFacade->get_node_alive(nodeId) == false); - - if(!next) return NO_CONTACT_WITH_DB_NODES; + NodeId nodeId = m_master_node; + if (okToSendTo(nodeId, false) != 0) + { + bool next; + nodeId = m_master_node = 0; + while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && + okToSendTo(nodeId, false) != 0); + if(!next) + return NO_CONTACT_WITH_DB_NODES; + } SimpleSignal ssig; BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend()); @@ -2208,7 +2274,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) const BackupRef * const ref = CAST_CONSTPTR(BackupRef, signal->getDataPtr()); if(ref->errorCode == BackupRef::IAmNotMaster){ - nodeId = refToNode(ref->masterRef); + m_master_node = nodeId = refToNode(ref->masterRef); #ifdef VM_TRACE ndbout_c("I'm not master resending to %d", nodeId); #endif diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index 46bdb112cb9..fe1603a1953 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -176,6 +176,7 @@ public: STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); + STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 ); STATIC_CONST( NODE_NOT_API_NODE = 5062 ); STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 ); @@ -252,7 +253,7 @@ public: * @param processId: Id of the DB process to stop * @return 0 if succeeded, otherwise: as stated above, plus: */ - int stopNode(int nodeId, bool abort = false); + int stopNodes(const Vector &node_ids, int *stopCount, bool abort); /** * Stop the system @@ -286,11 +287,12 @@ public: int start(int processId); /** - * Restart a node + * Restart nodes * @param processId: Id of the DB process to start */ - int restartNode(int processId, bool nostart, bool initialStart, - bool abort = false); + int restartNodes(const Vector &node_ids, + int *stopCount, bool nostart, + bool initialStart, bool abort); /** * Restart the system @@ -494,7 +496,7 @@ private: bool nostart, bool initialStart); - int sendSTOP_REQ(NodeId nodeId, + int sendSTOP_REQ(const Vector &node_ids, NodeBitmask &stoppedNodes, Uint32 singleUserNodeId, bool abort, @@ -653,6 +655,8 @@ private: friend class Ndb_mgmd_event_service; Ndb_mgmd_event_service m_event_listner; + NodeId m_master_node; + /** * Handles the thread wich upon a 'Node is started' event will * set the node's previous loglevel settings. diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 3564c5c40ba..64eca92df3d 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -866,14 +866,11 @@ MgmApiSession::restart(Parser::Context &, } int restarted = 0; - int result = 0; - - for(size_t i = 0; i < nodes.size(); i++) - if((result = m_mgmsrv.restartNode(nodes[i], - nostart != 0, - initialstart != 0, - abort != 0)) == 0) - restarted++; + int result= m_mgmsrv.restartNodes(nodes, + &restarted, + nostart != 0, + initialstart != 0, + abort != 0); m_output->println("restart reply"); if(result != 0){ @@ -998,7 +995,12 @@ MgmApiSession::stop(Parser::Context &, args.get("node", (const char **)&nodes_str); if(nodes_str == NULL) + { + m_output->println("stop reply"); + m_output->println("result: empty node list"); + m_output->println(""); return; + } args.get("abort", &abort); char *p, *last; @@ -1010,7 +1012,6 @@ MgmApiSession::stop(Parser::Context &, int stop_self= 0; size_t i; - for(i=0; i < nodes.size(); i++) { if (nodes[i] == m_mgmsrv.getOwnNodeId()) { stop_self= 1; @@ -1020,23 +1021,25 @@ MgmApiSession::stop(Parser::Context &, m_output->println(""); return; } + nodes.erase(i); + break; } } - int stopped = 0, result = 0; - - for(i=0; i < nodes.size(); i++) - if (nodes[i] != m_mgmsrv.getOwnNodeId()) { - if((result = m_mgmsrv.stopNode(nodes[i], abort != 0)) == 0) - stopped++; - } else - stopped++; + int stopped= 0; + int result= 0; + if (nodes.size()) + result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0); m_output->println("stop reply"); if(result != 0) m_output->println("result: %s", get_error_text(result)); else + { m_output->println("result: Ok"); + if (stop_self) + stopped++; + } m_output->println("stopped: %d", stopped); m_output->println(""); From 7ed2c6207af421ef24973e9219a43e737308aa62 Mon Sep 17 00:00:00 2001 From: "hartmut@mysql.com" <> Date: Fri, 7 Apr 2006 11:15:15 +0200 Subject: [PATCH 20/36] fix for wrong QUARTER calculation in EXTRACT() (Bug #18100) --- mysql-test/r/func_time.result | 36 +++++++++++++++++++++++++++++++++++ mysql-test/t/func_time.test | 18 ++++++++++++++++++ sql/item_timefunc.cc | 2 +- 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/func_time.result b/mysql-test/r/func_time.result index 64dafa132b4..5d40a0ed01f 100644 --- a/mysql-test/r/func_time.result +++ b/mysql-test/r/func_time.result @@ -360,6 +360,42 @@ extract(SECOND FROM "1999-01-02 10:11:12") select extract(MONTH FROM "2001-02-00"); extract(MONTH FROM "2001-02-00") 2 +SELECT EXTRACT(QUARTER FROM '2004-01-15') AS quarter; +quarter +1 +SELECT EXTRACT(QUARTER FROM '2004-02-15') AS quarter; +quarter +1 +SELECT EXTRACT(QUARTER FROM '2004-03-15') AS quarter; +quarter +1 +SELECT EXTRACT(QUARTER FROM '2004-04-15') AS quarter; +quarter +2 +SELECT EXTRACT(QUARTER FROM '2004-05-15') AS quarter; +quarter +2 +SELECT EXTRACT(QUARTER FROM '2004-06-15') AS quarter; +quarter +2 +SELECT EXTRACT(QUARTER FROM '2004-07-15') AS quarter; +quarter +3 +SELECT EXTRACT(QUARTER FROM '2004-08-15') AS quarter; +quarter +3 +SELECT EXTRACT(QUARTER FROM '2004-09-15') AS quarter; +quarter +3 +SELECT EXTRACT(QUARTER FROM '2004-10-15') AS quarter; +quarter +4 +SELECT EXTRACT(QUARTER FROM '2004-11-15') AS quarter; +quarter +4 +SELECT EXTRACT(QUARTER FROM '2004-12-15') AS quarter; +quarter +4 SELECT "1900-01-01 00:00:00" + INTERVAL 2147483648 SECOND; "1900-01-01 00:00:00" + INTERVAL 2147483648 SECOND 1968-01-20 03:14:08 diff --git a/mysql-test/t/func_time.test b/mysql-test/t/func_time.test index 3a2eea59bed..c98c1c94609 100644 --- a/mysql-test/t/func_time.test +++ b/mysql-test/t/func_time.test @@ -139,6 +139,24 @@ select extract(MINUTE_SECOND FROM "10:11:12"); select extract(SECOND FROM "1999-01-02 10:11:12"); select extract(MONTH FROM "2001-02-00"); +# +# test EXTRACT QUARTER (Bug #18100) +# + +SELECT EXTRACT(QUARTER FROM '2004-01-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-02-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-03-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-04-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-05-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-06-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-07-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-08-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-09-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-10-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-11-15') AS quarter; +SELECT EXTRACT(QUARTER FROM '2004-12-15') AS quarter; + + # # Test big intervals (Bug #3498) # diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 63a7f1f130b..e139eba385e 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -2213,7 +2213,7 @@ longlong Item_extract::val_int() switch (int_type) { case INTERVAL_YEAR: return ltime.year; case INTERVAL_YEAR_MONTH: return ltime.year*100L+ltime.month; - case INTERVAL_QUARTER: return ltime.month/3 + 1; + case INTERVAL_QUARTER: return (ltime.month+2)/3; case INTERVAL_MONTH: return ltime.month; case INTERVAL_WEEK: { From d202d89c0abfba175417b740b672ec727052ba40 Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Fri, 7 Apr 2006 12:01:07 +0200 Subject: [PATCH 21/36] multi node shutdown adabtions for restart of management servers --- ndb/src/mgmsrv/MgmtSrvr.cpp | 53 ++++++++++++++++++++++--------------- ndb/src/mgmsrv/Services.cpp | 23 ---------------- 2 files changed, 31 insertions(+), 45 deletions(-) diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index 4c5ba033b66..9b518ba938b 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -987,41 +987,44 @@ int MgmtSrvr::sendSTOP_REQ(const Vector &node_ids, // send the signals NodeBitmask nodes; - NodeId nodeId; + NodeId nodeId= 0; int use_master_node= 0; int do_send= 0; + int do_stop_self= 0; NdbNodeBitmask nodes_to_stop; { for (unsigned i= 0; i < node_ids.size(); i++) - nodes_to_stop.set(node_ids[i]); - } - if (node_ids.size()) - { - do_send= 1; - if (node_ids.size() == 1) { - nodeId= node_ids[0]; - if (nodeId == getOwnNodeId()) - { - if (restart) - g_RestartServer= true; - g_StopServer= true; - DBUG_RETURN(0); - } - else if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) + nodeId= node_ids[i]; + if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM) + nodes_to_stop.set(nodeId); + else if (nodeId != getOwnNodeId()) { error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart); if (error == 0) stoppedNodes.set(nodeId); - DBUG_RETURN(error); } + else + do_stop_self= 1;; } - else // multi node stop, send to master + } + int no_of_nodes_to_stop= nodes_to_stop.count(); + if (node_ids.size()) + { + if (no_of_nodes_to_stop) { - use_master_node= 1; - nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes); - StopReq::setStopNodes(stopReq->requestInfo, 1); + do_send= 1; + if (no_of_nodes_to_stop == 1) + { + nodeId= nodes_to_stop.find(0); + } + else // multi node stop, send to master + { + use_master_node= 1; + nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes); + StopReq::setStopNodes(stopReq->requestInfo, 1); + } } } else @@ -1105,7 +1108,7 @@ int MgmtSrvr::sendSTOP_REQ(const Vector &node_ids, } else { - assert(node_ids.size() > 1); + assert(no_of_nodes_to_stop > 1); stoppedNodes.bitOR(nodes_to_stop); } nodes.clear(nodeId); @@ -1150,6 +1153,12 @@ int MgmtSrvr::sendSTOP_REQ(const Vector &node_ids, DBUG_RETURN(SEND_OR_RECEIVE_FAILED); } } + if (!error && do_stop_self) + { + if (restart) + g_RestartServer= true; + g_StopServer= true; + } DBUG_RETURN(error); } diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 64eca92df3d..a80827abd8f 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -1010,22 +1010,6 @@ MgmApiSession::stop(Parser::Context &, nodes.push_back(atoi(p)); } - int stop_self= 0; - size_t i; - for(i=0; i < nodes.size(); i++) { - if (nodes[i] == m_mgmsrv.getOwnNodeId()) { - stop_self= 1; - if (i != nodes.size()-1) { - m_output->println("stop reply"); - m_output->println("result: server must be stopped last"); - m_output->println(""); - return; - } - nodes.erase(i); - break; - } - } - int stopped= 0; int result= 0; if (nodes.size()) @@ -1035,16 +1019,9 @@ MgmApiSession::stop(Parser::Context &, if(result != 0) m_output->println("result: %s", get_error_text(result)); else - { m_output->println("result: Ok"); - if (stop_self) - stopped++; - } m_output->println("stopped: %d", stopped); m_output->println(""); - - if (stop_self) - g_StopServer= true; } From 7e4d41de58888b5e7e69bbc00e17288b582922e3 Mon Sep 17 00:00:00 2001 From: "joerg@mysql.com" <> Date: Fri, 7 Apr 2006 13:02:15 +0200 Subject: [PATCH 22/36] Perl test script: Avoid some aborts, which made the whole build/test process terminate. --- mysql-test/lib/mtr_process.pl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mysql-test/lib/mtr_process.pl b/mysql-test/lib/mtr_process.pl index 4da608ad345..662b70a4fee 100644 --- a/mysql-test/lib/mtr_process.pl +++ b/mysql-test/lib/mtr_process.pl @@ -454,8 +454,7 @@ sub mtr_kill_leftovers () { if ( kill(0, @pids) ) # Check if some left { - # FIXME maybe just mtr_warning() ? - mtr_error("can't kill process(es) " . join(" ", @pids)); + mtr_warning("can't kill process(es) " . join(" ", @pids)); } } } @@ -468,7 +467,7 @@ sub mtr_kill_leftovers () { { if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) { - mtr_error("can't kill old mysqld holding port $srv->{'port'}"); + mtr_warning("can't kill old mysqld holding port $srv->{'port'}"); } } } From 65f4595e9a17d830931f4be0d257c97376cea0f8 Mon Sep 17 00:00:00 2001 From: "joerg@mysql.com" <> Date: Fri, 7 Apr 2006 13:34:39 +0200 Subject: [PATCH 23/36] mysql-test/mysql-test-run.pl : Add option "with-ndbcluster-only" (ignored). --- mysql-test/mysql-test-run.pl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index d4f759604dd..53bf820cce9 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -277,6 +277,8 @@ our $opt_udiff; our $opt_skip_ndbcluster; our $opt_with_ndbcluster; +our $opt_with_ndbcluster_only= 0; # dummy, ignored + our $opt_with_openssl; our $exe_ndb_mgm; @@ -512,6 +514,7 @@ sub command_line_setup () { 'force' => \$opt_force, 'with-ndbcluster' => \$opt_with_ndbcluster, 'skip-ndbcluster|skip-ndb' => \$opt_skip_ndbcluster, + 'with-ndbcluster-only' => \$opt_with_ndbcluster_only, 'do-test=s' => \$opt_do_test, 'suite=s' => \$opt_suite, 'skip-rpl' => \$opt_skip_rpl, @@ -596,6 +599,11 @@ sub command_line_setup () { print '#' x 78, "\n\n"; } + if ( $opt_with_ndbcluster_only ) + { + print "# Option '--with-ndbcluster-only' is ignored in this release.\n"; + } + foreach my $arg ( @ARGV ) { if ( $arg =~ /^--skip-/ ) From 7cd90d0081e8acdc9fd36f5536d06289e35cb19e Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Fri, 7 Apr 2006 15:48:13 +0200 Subject: [PATCH 24/36] ndb - bug#11033 silent valgrind warnings 5.0, ndb_autodiscover --- ndb/src/ndbapi/NdbDictionaryImpl.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ndb/src/ndbapi/NdbDictionaryImpl.cpp b/ndb/src/ndbapi/NdbDictionaryImpl.cpp index da344dc6164..f7a28eb989c 100644 --- a/ndb/src/ndbapi/NdbDictionaryImpl.cpp +++ b/ndb/src/ndbapi/NdbDictionaryImpl.cpp @@ -1109,9 +1109,14 @@ NdbDictInterface::getTable(const BaseString& name, bool fullyQualifiedNames) // Copy name to m_buffer to get a word sized buffer m_buffer.clear(); - m_buffer.grow(namelen_words*4); + m_buffer.grow(namelen_words*4+4); m_buffer.append(name.c_str(), namelen); +#ifndef IGNORE_VALGRIND_WARNINGS + Uint32 pad = 0; + m_buffer.append(&pad, 4); +#endif + LinearSectionPtr ptr[1]; ptr[0].p= (Uint32*)m_buffer.get_data(); ptr[0].sz= namelen_words; From 739b0d676b7a7ff939aa4e667d0a146a5f0d6b2b Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Fri, 7 Apr 2006 17:10:56 +0200 Subject: [PATCH 25/36] ndb - bug#11034 fix mem leak (not during running of transactions, but a few bytes when shutting down) --- ndb/src/ndbapi/NdbLinHash.hpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ndb/src/ndbapi/NdbLinHash.hpp b/ndb/src/ndbapi/NdbLinHash.hpp index 05670534c95..0655e81ce9d 100644 --- a/ndb/src/ndbapi/NdbLinHash.hpp +++ b/ndb/src/ndbapi/NdbLinHash.hpp @@ -427,19 +427,26 @@ NdbLinHash::getNext(NdbElement_t * curr){ return curr->next; int dir = 0, seg = 0; - - if(curr != 0){ + int counts; + if(curr != 0) + { getBucket(curr->hash, &dir, &seg); + counts = seg + 1; + } + else + { + counts = 0; } for(int countd = dir; countd < DIRECTORYSIZE;countd++ ){ if (directory[countd] != 0) { - for(int counts = seg + 1; counts < SEGMENTSIZE; counts++ ){ + for(; counts < SEGMENTSIZE; counts++ ){ if (directory[countd]->elements[counts] != 0) { return directory[countd]->elements[counts]; } } } + counts = 0; } return 0; From a7edbc647ee61342595ce5a4d5b4fb43c8b1db2d Mon Sep 17 00:00:00 2001 From: "joerg@mysql.com" <> Date: Fri, 7 Apr 2006 19:27:19 +0200 Subject: [PATCH 26/36] mysql-test/mysql-test-run.sh : Provide info about the options used. --- mysql-test/mysql-test-run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mysql-test/mysql-test-run.sh b/mysql-test/mysql-test-run.sh index 64eaaab9fa4..f1f146e18a9 100644 --- a/mysql-test/mysql-test-run.sh +++ b/mysql-test/mysql-test-run.sh @@ -250,6 +250,8 @@ START_WAIT_TIMEOUT=10 STOP_WAIT_TIMEOUT=10 MYSQL_TEST_SSL_OPTS="" +$ECHO "Logging: $0 $*" # To ensure we see all arguments in the output, for the test analysis tool + while test $# -gt 0; do case "$1" in --user=*) DBUSER=`$ECHO "$1" | $SED -e "s;--user=;;"` ;; From d881fb36b90a0b791dc03f59aaa4f1476e37029d Mon Sep 17 00:00:00 2001 From: "aelkin@mysql.com" <> Date: Fri, 7 Apr 2006 20:44:37 +0300 Subject: [PATCH 27/36] Bug#17284 erroneous temp table cleanup on slave. Idea of the fix is for master to send FD event with `created' as 0 to reconnecting slave (upon slave_net_timeout, no master crash) to avoid destroying temp tables. In a case of a connect by slave to the master after its crash temp tables have been already cleaned up so that slave can not keep `orphan' temp tables. --- .../include/get_binlog_dump_thread_id.inc | 9 +++++++ mysql-test/r/rpl_temporary.result | 14 ++++++++++ mysql-test/t/rpl_temporary.test | 27 ++++++++++++++++++- sql/sql_repl.cc | 6 +++++ 4 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 mysql-test/include/get_binlog_dump_thread_id.inc diff --git a/mysql-test/include/get_binlog_dump_thread_id.inc b/mysql-test/include/get_binlog_dump_thread_id.inc new file mode 100644 index 00000000000..9efa12c5611 --- /dev/null +++ b/mysql-test/include/get_binlog_dump_thread_id.inc @@ -0,0 +1,9 @@ +--exec $MYSQL test -e 'show processlist' | grep 'Binlog Dump' | cut -f1 > $MYSQLTEST_VARDIR/tmp/bl_dump_thread_id +--disable_warnings +drop table if exists t999; +--enable_warnings +create temporary table t999 (f int); +--replace_result $MYSQL_TEST_DIR "." +eval LOAD DATA INFILE "$MYSQLTEST_VARDIR/tmp/bl_dump_thread_id" into table t999; +let $id = `select f from t999`; +drop table t999; diff --git a/mysql-test/r/rpl_temporary.result b/mysql-test/r/rpl_temporary.result index a76fb87a52b..32cd1f16b99 100644 --- a/mysql-test/r/rpl_temporary.result +++ b/mysql-test/r/rpl_temporary.result @@ -89,3 +89,17 @@ f 7 drop table t1,t2; create temporary table t3 (f int); +create temporary table t4 (f int); +create table t5 (f int); +drop table if exists t999; +create temporary table t999 (f int); +LOAD DATA INFILE "./var/tmp/bl_dump_thread_id" into table t999; +drop table t999; +insert into t4 values (1); +kill `select id from information_schema.processlist where command='Binlog Dump'`; +insert into t5 select * from t4; +select * from t5 /* must be 1 after reconnection */; +f +1 +drop temporary table t4; +drop table t5; diff --git a/mysql-test/t/rpl_temporary.test b/mysql-test/t/rpl_temporary.test index fcb2391a9d8..2400eac76ba 100644 --- a/mysql-test/t/rpl_temporary.test +++ b/mysql-test/t/rpl_temporary.test @@ -129,6 +129,31 @@ drop table t1,t2; create temporary table t3 (f int); sync_with_master; +# +# Bug#17284 erroneous temp table cleanup on slave +# + +connection master; +create temporary table t4 (f int); +create table t5 (f int); +sync_with_master; +# find dumper's $id +source include/get_binlog_dump_thread_id.inc; +insert into t4 values (1); +# a hint how to do that in 5.1 +--replace_result $id "`select id from information_schema.processlist where command='Binlog Dump'`" +eval kill $id; # to stimulate reconnection by slave w/o timeout +insert into t5 select * from t4; +save_master_pos; + +connection slave; +sync_with_master; +select * from t5 /* must be 1 after reconnection */; + +connection master; +drop temporary table t4; +drop table t5; + # The server will now close done -# End of 4.1 tests +# End of 5.0 tests diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 07678d97800..85d93767486 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -466,6 +466,12 @@ impossible position"; (rli->group_master_log_pos) */ int4store((char*) packet->ptr()+LOG_POS_OFFSET+1, 0); + /* + if reconnect master sends FD event with `created' as 0 + to avoid destroying temp tables. + */ + int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+ + ST_CREATED_OFFSET+1, (ulong) 0); /* send it */ if (my_net_write(net, (char*)packet->ptr(), packet->length())) { From 612266ec90b309208f8af677757b7c20aefd4830 Mon Sep 17 00:00:00 2001 From: "aivanov@mysql.com" <> Date: Fri, 7 Apr 2006 23:58:17 +0400 Subject: [PATCH 28/36] Fixed BUG#15758: "Holding adaptive search latch in innobase_query_caching_of_table_permitted()". Applied the patch due to Heikki Tuuri. Also removed superfluous #ifdefs. --- sql/sql_cache.cc | 5 +++++ sql/sql_class.cc | 4 ---- sql/sql_cursor.cc | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index cf3ba9c8c40..31201474c05 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -830,6 +830,11 @@ sql mode: 0x%lx, sort len: %lu, conncat len: %lu", flags.sql_mode, flags.max_sort_length, flags.group_concat_max_len)); + /* + Make InnoDB to release the adaptive hash index latch before + acquiring the query cache mutex. + */ + ha_release_temporary_latches(thd); STRUCT_LOCK(&structure_guard_mutex); if (query_cache_size == 0) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 59391a333c3..5a6bbe01183 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -919,14 +919,12 @@ bool select_send::send_data(List &items) return 0; } -#ifdef HAVE_INNOBASE_DB /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ ha_release_temporary_latches(thd); -#endif List_iterator_fast li(items); Protocol *protocol= thd->protocol; @@ -956,12 +954,10 @@ bool select_send::send_data(List &items) bool select_send::send_eof() { -#ifdef HAVE_INNOBASE_DB /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ ha_release_temporary_latches(thd); -#endif /* Unlock tables before sending packet to gain some speed */ if (thd->lock) diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc index 33ad27b9d14..2784e71ccae 100644 --- a/sql/sql_cursor.cc +++ b/sql/sql_cursor.cc @@ -445,9 +445,8 @@ Sensitive_cursor::fetch(ulong num_rows) if (error == NESTED_LOOP_CURSOR_LIMIT) join->resume_nested_loop= TRUE; -#ifdef USING_TRANSACTIONS ha_release_temporary_latches(thd); -#endif + /* Grab free_list here to correctly free it in close */ thd->restore_active_arena(this, &backup_arena); From 010a8a5728bc77ecf66060d81bf5b90e0f356ea3 Mon Sep 17 00:00:00 2001 From: "aelkin@mysql.com" <> Date: Sat, 8 Apr 2006 16:29:11 +0300 Subject: [PATCH 29/36] #18906: bushbuild reports rpl_temporary fails on solaris, AIX, QNX Removing the problematic line from include test file. --- mysql-test/include/get_binlog_dump_thread_id.inc | 4 +++- mysql-test/r/rpl_temporary.result | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mysql-test/include/get_binlog_dump_thread_id.inc b/mysql-test/include/get_binlog_dump_thread_id.inc index 9efa12c5611..680807cc3c6 100644 --- a/mysql-test/include/get_binlog_dump_thread_id.inc +++ b/mysql-test/include/get_binlog_dump_thread_id.inc @@ -3,7 +3,9 @@ drop table if exists t999; --enable_warnings create temporary table t999 (f int); ---replace_result $MYSQL_TEST_DIR "." +### --replace_result $MYSQL_TEST_DIR "." #failed on solaris etc bug#18906 +--disable_query_log eval LOAD DATA INFILE "$MYSQLTEST_VARDIR/tmp/bl_dump_thread_id" into table t999; +--enable_query_log let $id = `select f from t999`; drop table t999; diff --git a/mysql-test/r/rpl_temporary.result b/mysql-test/r/rpl_temporary.result index 32cd1f16b99..881504f49d0 100644 --- a/mysql-test/r/rpl_temporary.result +++ b/mysql-test/r/rpl_temporary.result @@ -93,7 +93,6 @@ create temporary table t4 (f int); create table t5 (f int); drop table if exists t999; create temporary table t999 (f int); -LOAD DATA INFILE "./var/tmp/bl_dump_thread_id" into table t999; drop table t999; insert into t4 values (1); kill `select id from information_schema.processlist where command='Binlog Dump'`; From 17141582d485fbf8d60212e13881581214d2767a Mon Sep 17 00:00:00 2001 From: "igor@rurik.mysql.com" <> Date: Sat, 8 Apr 2006 11:42:09 -0700 Subject: [PATCH 30/36] Fixed bug #16069. The function agg_cmp_type in item_cmpfunc.cc neglected the fact that the first argument in a BETWEEN/IN predicate could be a field of a view. As a result in the case when the retrieved table was hidden by a view over it and the arguments in the BETWEEN/IN predicates are of the date/time type the function did not perform conversion of the constant arguments to the same format as the first field argument. If formats of the arguments differed it caused wrong a evaluation of the predicates. --- mysql-test/r/view.result | 21 +++++++++++++++++++++ mysql-test/t/view.test | 20 ++++++++++++++++++++ sql/item_cmpfunc.cc | 4 ++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result index f0fb35f1a2e..7519b8022f0 100644 --- a/mysql-test/r/view.result +++ b/mysql-test/r/view.result @@ -2579,3 +2579,24 @@ COUNT(*) 2 DROP VIEW v2; DROP TABLE t1, t2; +CREATE TABLE t1 (id int NOT NULL PRIMARY KEY, +td date DEFAULT NULL, KEY idx(td)); +INSERT INTO t1 VALUES +(1, '2005-01-01'), (2, '2005-01-02'), (3, '2005-01-02'), +(4, '2005-01-03'), (5, '2005-01-04'), (6, '2005-01-05'), +(7, '2005-01-05'), (8, '2005-01-05'), (9, '2005-01-06'); +CREATE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM t1 WHERE td BETWEEN '2005.01.02' AND '2005.01.04'; +id td +2 2005-01-02 +3 2005-01-02 +4 2005-01-03 +5 2005-01-04 +SELECT * FROM v1 WHERE td BETWEEN '2005.01.02' AND '2005.01.04'; +id td +2 2005-01-02 +3 2005-01-02 +4 2005-01-03 +5 2005-01-04 +DROP VIEW v1; +DROP TABLE t1; diff --git a/mysql-test/t/view.test b/mysql-test/t/view.test index afeb0dda729..7ef1f82dbd3 100644 --- a/mysql-test/t/view.test +++ b/mysql-test/t/view.test @@ -2434,3 +2434,23 @@ SELECT COUNT(*) FROM t1 LEFT JOIN v2 ON t1.id=v2.id; DROP VIEW v2; DROP TABLE t1, t2; + +# +# Bug #16069: VIEW does return the same results as underlying SELECT +# with WHERE condition containing BETWEEN over dates + +CREATE TABLE t1 (id int NOT NULL PRIMARY KEY, + td date DEFAULT NULL, KEY idx(td)); + +INSERT INTO t1 VALUES + (1, '2005-01-01'), (2, '2005-01-02'), (3, '2005-01-02'), + (4, '2005-01-03'), (5, '2005-01-04'), (6, '2005-01-05'), + (7, '2005-01-05'), (8, '2005-01-05'), (9, '2005-01-06'); + +CREATE VIEW v1 AS SELECT * FROM t1; + +SELECT * FROM t1 WHERE td BETWEEN '2005.01.02' AND '2005.01.04'; +SELECT * FROM v1 WHERE td BETWEEN '2005.01.02' AND '2005.01.04'; + +DROP VIEW v1; +DROP TABLE t1; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 24075ac838d..6e1afd4ef09 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -55,8 +55,8 @@ static void agg_cmp_type(THD *thd, Item_result *type, Item **items, uint nitems) bool all_constant= TRUE; /* If the first argument is a FIELD_ITEM, pull out the field. */ - if (items[0]->type() == Item::FIELD_ITEM) - field=((Item_field *)items[0])->field; + if (items[0]->real_item()->type() == Item::FIELD_ITEM) + field=((Item_field *)(items[0]->real_item()))->field; /* But if it can't be compared as a longlong, we don't really care. */ if (field && !field->can_be_compared_as_longlong()) field= NULL; From 4d5ab7a9616da808db67392732566865c1f10827 Mon Sep 17 00:00:00 2001 From: "knielsen@mysql.com" <> Date: Sat, 8 Apr 2006 22:27:43 +0200 Subject: [PATCH 31/36] $MYSQL_TEST was broken with --valgrind. --- mysql-test/mysql-test-run.pl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 53bf820cce9..20bb6e0117a 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -2316,11 +2316,6 @@ sub run_mysqltest ($) { mtr_init_args(\$args); - if ( defined $opt_valgrind_mysqltest ) - { - valgrind_arguments($args, \$exe); - } - mtr_add_arg($args, "--no-defaults"); mtr_add_arg($args, "--socket=%s", $master->[0]->{'path_mysock'}); mtr_add_arg($args, "--database=test"); @@ -2398,6 +2393,17 @@ sub run_mysqltest ($) { # Add arguments that should not go into the MYSQL_TEST env var # ---------------------------------------------------------------------- + if ( defined $opt_valgrind_mysqltest ) + { + # Prefix the Valgrind options to the argument list. + # We do this here, since we do not want to Valgrind the nested invocations + # of mysqltest; that would mess up the stderr output causing test failure. + my @args_saved = @$args; + mtr_init_args(\$args); + valgrind_arguments($args, \$exe); + mtr_add_arg($args, "%s", $_) for @args_saved; + } + mtr_add_arg($args, "-R"); mtr_add_arg($args, $tinfo->{'result_file'}); From cbee6eca95779c08f639cc5bc0d343c7dfbd8a83 Mon Sep 17 00:00:00 2001 From: "aelkin@mysql.com" <> Date: Sun, 9 Apr 2006 13:27:53 +0300 Subject: [PATCH 32/36] BUG#18906 Test case rpl_temporary fails when using --vardir option env variable name is fixed. --- mysql-test/include/get_binlog_dump_thread_id.inc | 4 +--- mysql-test/r/rpl_temporary.result | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mysql-test/include/get_binlog_dump_thread_id.inc b/mysql-test/include/get_binlog_dump_thread_id.inc index 680807cc3c6..830a88b5db6 100644 --- a/mysql-test/include/get_binlog_dump_thread_id.inc +++ b/mysql-test/include/get_binlog_dump_thread_id.inc @@ -3,9 +3,7 @@ drop table if exists t999; --enable_warnings create temporary table t999 (f int); -### --replace_result $MYSQL_TEST_DIR "." #failed on solaris etc bug#18906 ---disable_query_log +--replace_result $MYSQLTEST_VARDIR "." eval LOAD DATA INFILE "$MYSQLTEST_VARDIR/tmp/bl_dump_thread_id" into table t999; ---enable_query_log let $id = `select f from t999`; drop table t999; diff --git a/mysql-test/r/rpl_temporary.result b/mysql-test/r/rpl_temporary.result index 881504f49d0..12143561854 100644 --- a/mysql-test/r/rpl_temporary.result +++ b/mysql-test/r/rpl_temporary.result @@ -93,6 +93,7 @@ create temporary table t4 (f int); create table t5 (f int); drop table if exists t999; create temporary table t999 (f int); +LOAD DATA INFILE "./tmp/bl_dump_thread_id" into table t999; drop table t999; insert into t4 values (1); kill `select id from information_schema.processlist where command='Binlog Dump'`; From b9d427ca27883cfe2e1e03e5b578d79b02d780b3 Mon Sep 17 00:00:00 2001 From: "knielsen@mysql.com" <> Date: Mon, 10 Apr 2006 09:31:46 +0200 Subject: [PATCH 33/36] Fix test case 'cast' on Windows, different floating point output format. --- mysql-test/t/cast.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mysql-test/t/cast.test b/mysql-test/t/cast.test index 7e09f44397c..533da542855 100644 --- a/mysql-test/t/cast.test +++ b/mysql-test/t/cast.test @@ -171,6 +171,8 @@ select cast(1.0e+300 as signed int); CREATE TABLE t1 (f1 double); INSERT INTO t1 SET f1 = -1.0e+30 ; INSERT INTO t1 SET f1 = +1.0e+30 ; +# Expected result is +-1e+30, but Windows returns +-1e+030. +--replace_result 1e+030 1e+30 SELECT f1 AS double_val, CAST(f1 AS SIGNED INT) AS cast_val FROM t1; DROP TABLE t1; From bb94bb30d0a3ebe2de24a15327ea2110396ff6bc Mon Sep 17 00:00:00 2001 From: "aivanov@mysql.com" <> Date: Mon, 10 Apr 2006 19:47:41 +0400 Subject: [PATCH 34/36] Fixed BUG#15868: InnoDB hangs under 200 threads making INSERTs: 'thread thrashing'. Applied changes due Heikki Tuuri. --- sql/mysqld.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e9ff220a6a1..9dd37bbebc9 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -5531,7 +5531,7 @@ log and this option does nothing anymore.", "Sets the maximum number of threads allowed inside InnoDB. Value 0" " will disable the thread throttling.", (gptr*) &srv_thread_concurrency, (gptr*) &srv_thread_concurrency, - 0, GET_LONG, REQUIRED_ARG, 0, 0, 1000, 0, 1, 0}, + 0, GET_LONG, REQUIRED_ARG, 8, 0, 1000, 0, 1, 0}, {"innodb_thread_sleep_delay", OPT_INNODB_THREAD_SLEEP_DELAY, "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0" " disable a sleep", From 6ad6cda2caa6e681a4a67002f0f2be41f17463bf Mon Sep 17 00:00:00 2001 From: "aivanov@mysql.com" <> Date: Mon, 10 Apr 2006 22:44:01 +0400 Subject: [PATCH 35/36] Fixed test case result (after BUG#15868 fix). --- mysql-test/r/innodb.result | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result index e8af68a6067..c5658d22eb6 100644 --- a/mysql-test/r/innodb.result +++ b/mysql-test/r/innodb.result @@ -1821,7 +1821,7 @@ Variable_name Value innodb_sync_spin_loops 20 show variables like "innodb_thread_concurrency"; Variable_name Value -innodb_thread_concurrency 0 +innodb_thread_concurrency 8 set global innodb_thread_concurrency=1001; show variables like "innodb_thread_concurrency"; Variable_name Value From e2feeb88f90356c412312a1fb613613074129444 Mon Sep 17 00:00:00 2001 From: "igor@rurik.mysql.com" <> Date: Mon, 10 Apr 2006 14:14:20 -0700 Subject: [PATCH 36/36] Made the test case for bug #15917 independent on platforms. --- mysql-test/r/having.result | 2 -- mysql-test/t/having.test | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/having.result b/mysql-test/r/having.result index c827e11e50e..fe918e4c3ff 100644 --- a/mysql-test/r/having.result +++ b/mysql-test/r/having.result @@ -360,8 +360,6 @@ s1 count(s1) y 1 drop table t1; DROP SCHEMA IF EXISTS HU; -Warnings: -Note 1008 Can't drop database 'HU'; database doesn't exist CREATE SCHEMA HU ; USE HU ; CREATE TABLE STAFF diff --git a/mysql-test/t/having.test b/mysql-test/t/having.test index 9e5bc4bc136..9b21e544657 100644 --- a/mysql-test/t/having.test +++ b/mysql-test/t/having.test @@ -354,7 +354,9 @@ drop table t1; # when the server is run on Windows or with --lower-case-table-names=1 # +--disable_warnings DROP SCHEMA IF EXISTS HU; +--enable_warnings CREATE SCHEMA HU ; USE HU ;