From a609410829b434e287fcd136b13ca0ddf06282f5 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 13 Mar 2007 11:29:14 +0100 Subject: [PATCH 1/4] ndb - bug#27003 Handle random(not in order) LQHKEYREQ failures during node-restart ndb/src/kernel/blocks/ERROR_codes.txt: Document new error codes ndb/src/kernel/blocks/dblqh/DblqhMain.cpp: Handle random(not in order) LQHKEYREQ failures during node-restart ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp: Error codes for various oom problems ndb/src/kernel/blocks/dbtup/DbtupGen.cpp: move CLEAR_ERROR_INSERT_VALUE to constructor so that it's reasonable to use it for restart testing ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp: Add error insert for CopyFragRef ndb/test/ndbapi/testNodeRestart.cpp: Testprg for bug#27003 ndb/test/run-test/daily-basic-tests.txt: add testprg --- ndb/src/kernel/blocks/ERROR_codes.txt | 12 ++++ ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 19 ++++++- .../kernel/blocks/dbtup/DbtupExecQuery.cpp | 24 ++++++++ ndb/src/kernel/blocks/dbtup/DbtupGen.cpp | 2 +- ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 1 + ndb/test/ndbapi/testNodeRestart.cpp | 56 +++++++++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 4 ++ 7 files changed, 114 insertions(+), 4 deletions(-) diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index f7cb49014cb..ed35db91738 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -489,3 +489,15 @@ Dbdict: 6003 Crash in participant @ CreateTabReq::Prepare 6004 Crash in participant @ CreateTabReq::Commit 6005 Crash in participant @ CreateTabReq::CreateDrop + +TUP: +---- + +4025: Fail all inserts with out of memory +4026: Fail one insert with oom +4027: Fail inserts randomly with oom +4028: Fail one random insert with oom + +NDBCNTR: + +1000: Crash insertion on SystemError::CopyFragRef diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 698e5ac292c..5847e1063aa 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -9641,6 +9641,15 @@ void Dblqh::copyCompletedLab(Signal* signal) closeCopyLab(signal); return; }//if + + if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY && + scanptr.p->scanErrorCounter) + { + jam(); + closeCopyLab(signal); + return; + } + if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY) { jam(); /*---------------------------------------------------------------------------*/ @@ -9717,13 +9726,16 @@ void Dblqh::continueCopyAfterBlockedLab(Signal* signal) void Dblqh::copyLqhKeyRefLab(Signal* signal) { ndbrequire(tcConnectptr.p->transid[1] == signal->theData[4]); - tcConnectptr.p->copyCountWords -= signal->theData[3]; + Uint32 copyWords = signal->theData[3]; scanptr.i = tcConnectptr.p->tcScanRec; c_scanRecordPool.getPtr(scanptr); scanptr.p->scanErrorCounter++; tcConnectptr.p->errorCode = terrorCode; - closeCopyLab(signal); - return; + + LqhKeyConf* conf = (LqhKeyConf*)signal->getDataPtrSend(); + conf->transId1 = copyWords; + conf->transId2 = tcConnectptr.p->transid[1]; + copyCompletedLab(signal); }//Dblqh::copyLqhKeyRefLab() void Dblqh::closeCopyLab(Signal* signal) @@ -9734,6 +9746,7 @@ void Dblqh::closeCopyLab(Signal* signal) // Wait until all of those have arrived until we start the // close process. /*---------------------------------------------------------------------------*/ + scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY; jam(); return; }//if diff --git a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 9917ac4e340..15ce54e594c 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -213,6 +213,30 @@ void Dbtup::execTUP_ALLOCREQ(Signal* signal) //--------------------------------------------------- PagePtr pagePtr; Uint32 pageOffset; + + if (ERROR_INSERTED(4025)) + { + signal->theData[0] = 827; + return; + } + if (ERROR_INSERTED(4026)) + { + CLEAR_ERROR_INSERT_VALUE; + signal->theData[0] = 827; + return; + } + if (ERROR_INSERTED(4027) && (rand() % 100) > 25) + { + signal->theData[0] = 827; + return; + } + if (ERROR_INSERTED(4028) && (rand() % 100) > 25) + { + CLEAR_ERROR_INSERT_VALUE; + signal->theData[0] = 827; + return; + } + if (!allocTh(regFragPtr.p, regTabPtr.p, NORMAL_PAGE, diff --git a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp index 6b1056fdeac..f5b4e1fb944 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp @@ -66,6 +66,7 @@ void Dbtup::initData() undoPage = 0; totNoOfPagesAllocated = 0; cnoOfAllocatedPages = 0; + CLEAR_ERROR_INSERT_VALUE; // Records with constant sizes }//Dbtup::initData() @@ -570,7 +571,6 @@ void Dbtup::execSTTOR(Signal* signal) switch (startPhase) { case ZSTARTPHASE1: ljam(); - CLEAR_ERROR_INSERT_VALUE; cownref = calcTupBlockRef(0); break; default: diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index a9039385805..c05f04d700c 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -180,6 +180,7 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) break; case SystemError::CopyFragRefError: + CRASH_INSERTION(1000); BaseString::snprintf(buf, sizeof(buf), "Killed by node %d as " "copyfrag failed, error: %u", diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index c8c8ddd88d6..e8f8ac66f74 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -1125,6 +1125,59 @@ runBug26481(NDBT_Context* ctx, NDBT_Step* step) return NDBT_OK; } +int +runBug27003(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter res; + + static const int errnos[] = { 4025, 4026, 4027, 4028, 0 }; + + int node = res.getRandomNotMasterNodeId(rand()); + ndbout_c("node: %d", node); + if (res.restartOneDbNode(node, false, true, true)) + return NDBT_FAILED; + + Uint32 pos = 0; + for (Uint32 i = 0; i Date: Thu, 15 Mar 2007 11:05:12 +0100 Subject: [PATCH 2/4] ndb - fix bug in UtilTransactions::compare reset rowcount on temporary error during scan of base table --- ndb/test/src/UtilTransactions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ndb/test/src/UtilTransactions.cpp b/ndb/test/src/UtilTransactions.cpp index 31c323045ed..c3941bda5dd 100644 --- a/ndb/test/src/UtilTransactions.cpp +++ b/ndb/test/src/UtilTransactions.cpp @@ -1382,6 +1382,7 @@ UtilTransactions::compare(Ndb* pNdb, const char* tab_name2, int flags){ goto error; } + row_count= 0; { int eof; while((eof = pOp->nextResult(true)) == 0) From 54edcb1861998256a857773fd6385861061f3118 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 20 Mar 2007 16:08:39 +0100 Subject: [PATCH 3/4] ndb - bug#27291 Fix correct min-value for LockPagesInMemory ndb/src/mgmsrv/ConfigInfo.cpp: Fix correct min-value --- ndb/src/mgmsrv/ConfigInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ndb/src/mgmsrv/ConfigInfo.cpp b/ndb/src/mgmsrv/ConfigInfo.cpp index 7f89f5c5c49..7f220c0da65 100644 --- a/ndb/src/mgmsrv/ConfigInfo.cpp +++ b/ndb/src/mgmsrv/ConfigInfo.cpp @@ -566,7 +566,7 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { true, ConfigInfo::CI_INT, "0", - "1", + "0", "2" }, { From 26afc93a0e4154b797adb0bcb0c85cf7f4f541e1 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 20 Mar 2007 16:16:25 +0100 Subject: [PATCH 4/4] ndb - bug#27283 (wl2325-5.0) Handle race condtition between MASTER_GCPCONF and execGCP_NODEFINISH ndb/src/kernel/blocks/ERROR_codes.txt: new error codes ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Handle race condtition between MASTER_GCPCONF and execGCP_NODEFINISH ndb/test/ndbapi/testNodeRestart.cpp: testcase ndb/test/run-test/daily-basic-tests.txt: testcase --- ndb/src/kernel/blocks/ERROR_codes.txt | 2 +- ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 32 ++++++++++++++-- ndb/test/ndbapi/testNodeRestart.cpp | 45 +++++++++++++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 4 ++ 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index ed35db91738..9c2b441e7be 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -5,7 +5,7 @@ Next DBACC 3002 Next DBTUP 4014 Next DBLQH 5043 Next DBDICT 6007 -Next DBDIH 7181 +Next DBDIH 7183 Next DBTC 8039 Next CMVMI 9000 Next BACKUP 10022 diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index bf71bc56723..30749cb5a05 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -4811,6 +4811,15 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal) } else { ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING); }//if + + if (ERROR_INSERTED(7181)) + { + ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ"); + CLEAR_ERROR_INSERT_VALUE; + signal->theData[1] = coldgcp; + execGCP_TCFINISHED(signal); + } + MasterGCPConf::State gcpState; switch (cgcpParticipantState) { case GCP_PARTICIPANT_READY: @@ -4877,6 +4886,14 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal) masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i]; sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal, MasterGCPConf::SignalLength, JBB); + + if (ERROR_INSERTED(7182)) + { + ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ"); + CLEAR_ERROR_INSERT_VALUE; + signal->theData[1] = coldgcp; + execGCP_TCFINISHED(signal); + } }//Dbdih::execMASTER_GCPREQ() void Dbdih::execMASTER_GCPCONF(Signal* signal) @@ -7542,10 +7559,10 @@ void Dbdih::execGCP_NODEFINISH(Signal* signal) } else if (cmasterState == MASTER_TAKE_OVER_GCP) { jam(); //------------------------------------------------------------- - // We are currently taking over as master. We will delay the - // signal until we have completed the take over gcp handling. + // We are currently taking over as master. Ignore + // signal in this case since we will discover it in reception of + // MASTER_GCPCONF. //------------------------------------------------------------- - sendSignalWithDelay(reference(), GSN_GCP_NODEFINISH, signal, 20, 3); return; } else { ndbrequire(cmasterState == MASTER_ACTIVE); @@ -7692,6 +7709,15 @@ void Dbdih::execGCP_TCFINISHED(Signal* signal) Uint32 gci = signal->theData[1]; ndbrequire(gci == coldgcp); + if (ERROR_INSERTED(7181) || ERROR_INSERTED(7182)) + { + ndbout_c("killing %d", refToNode(cmasterdihref)); + signal->theData[0] = 9999; + sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)), + GSN_NDB_TAMPER, signal, 1, JBB); + return; + } + cgcpParticipantState = GCP_PARTICIPANT_TC_FINISHED; signal->theData[0] = cownNodeId; signal->theData[1] = coldgcp; diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index e8f8ac66f74..dfb48ea3657 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -1178,6 +1178,48 @@ runBug27003(NDBT_Context* ctx, NDBT_Step* step) } +int +runBug27283(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter res; + + if (res.getNumDbNodes() < 2) + { + return NDBT_OK; + } + + static const int errnos[] = { 7181, 7182, 0 }; + + Uint32 pos = 0; + for (Uint32 i = 0; i