Bug #40399 Please make mtr print stack trace after every failure
SIGABRT is sent to relevant processes after a timeout client/mysqltest.cc: Fixed signal handlers to mysqltest actually dumps core mysql-test/lib/My/CoreDump.pm: Added support for dbx mysql-test/lib/My/SafeProcess.pm: Added dump_core to force process to dump core mysql-test/lib/My/SafeProcess/safe_process.cc: Traps SIGABRT and sends this on to child mysql-test/mysql-test-run.pl: When test times out, force core dumps on mysqltest and servers
This commit is contained in:
parent
bb42e1ab05
commit
089663f9a7
@ -7326,6 +7326,13 @@ static sig_handler signal_handler(int sig)
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "mysqltest got " SIGNAL_FMT "\n", sig);
|
fprintf(stderr, "mysqltest got " SIGNAL_FMT "\n", sig);
|
||||||
dump_backtrace();
|
dump_backtrace();
|
||||||
|
|
||||||
|
fprintf(stderr, "Writing a core file...\n");
|
||||||
|
fflush(stderr);
|
||||||
|
my_write_core(sig);
|
||||||
|
#ifndef __WIN__
|
||||||
|
exit(1); // Shouldn't get here but just in case
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __WIN__
|
#ifdef __WIN__
|
||||||
|
@ -49,12 +49,56 @@ sub _gdb {
|
|||||||
|
|
||||||
unlink $tmp_name or die "Error removing $tmp_name: $!";
|
unlink $tmp_name or die "Error removing $tmp_name: $!";
|
||||||
|
|
||||||
|
return if $? >> 8;
|
||||||
return unless $gdb_output;
|
return unless $gdb_output;
|
||||||
|
|
||||||
print <<EOF, $gdb_output, "\n";
|
print <<EOF, $gdb_output, "\n";
|
||||||
Output from gdb follows. The first stack trace is from the failing thread.
|
Output from gdb follows. The first stack trace is from the failing thread.
|
||||||
The following stack traces are from all threads (so the failing one is
|
The following stack traces are from all threads (so the failing one is
|
||||||
duplicated).
|
duplicated).
|
||||||
|
--------------------------
|
||||||
|
EOF
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub _dbx {
|
||||||
|
my ($core_name)= @_;
|
||||||
|
|
||||||
|
print "\nTrying 'dbx' to get a backtrace\n";
|
||||||
|
|
||||||
|
return unless -f $core_name;
|
||||||
|
|
||||||
|
# Find out name of binary that generated core
|
||||||
|
`echo | dbx - '$core_name' 2>&1` =~
|
||||||
|
/Corefile specified executable: "([^"]+)"/;
|
||||||
|
my $binary= $1 or return;
|
||||||
|
print "Core generated by '$binary'\n";
|
||||||
|
|
||||||
|
# Find all threads
|
||||||
|
my @thr_ids = `echo threads | dbx '$binary' '$core_name' 2>&1` =~ /t@\d+/g;
|
||||||
|
|
||||||
|
# Create tempfile containing dbx commands
|
||||||
|
my ($tmp, $tmp_name) = tempfile();
|
||||||
|
foreach my $thread (@thr_ids) {
|
||||||
|
print $tmp "where $thread\n";
|
||||||
|
}
|
||||||
|
print $tmp "exit\n";
|
||||||
|
close $tmp or die "Error closing $tmp_name: $!";
|
||||||
|
|
||||||
|
# Run dbx
|
||||||
|
my $dbx_output=
|
||||||
|
`cat '$tmp_name' | dbx '$binary' '$core_name' 2>&1`;
|
||||||
|
|
||||||
|
unlink $tmp_name or die "Error removing $tmp_name: $!";
|
||||||
|
|
||||||
|
return if $? >> 8;
|
||||||
|
return unless $dbx_output;
|
||||||
|
|
||||||
|
print <<EOF, $dbx_output, "\n";
|
||||||
|
Output from dbx follows. Stack trace is printed for all threads in order,
|
||||||
|
above this you should see info about which thread was the failing one.
|
||||||
|
----------------------------
|
||||||
EOF
|
EOF
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -63,12 +107,18 @@ EOF
|
|||||||
sub show {
|
sub show {
|
||||||
my ($class, $core_name)= @_;
|
my ($class, $core_name)= @_;
|
||||||
|
|
||||||
|
# We try dbx first; gdb itself may coredump if run on a Sun Studio
|
||||||
|
# compiled binary on Solaris.
|
||||||
|
|
||||||
my @debuggers =
|
my @debuggers =
|
||||||
(
|
(
|
||||||
|
\&_dbx,
|
||||||
\&_gdb,
|
\&_gdb,
|
||||||
# TODO...
|
# TODO...
|
||||||
);
|
);
|
||||||
|
|
||||||
|
# Try debuggers until one succeeds
|
||||||
|
|
||||||
foreach my $debugger (@debuggers){
|
foreach my $debugger (@debuggers){
|
||||||
if ($debugger->($core_name)){
|
if ($debugger->($core_name)){
|
||||||
return;
|
return;
|
||||||
|
@ -349,13 +349,24 @@ sub start_kill {
|
|||||||
{
|
{
|
||||||
$pid= $self->{SAFE_PID};
|
$pid= $self->{SAFE_PID};
|
||||||
die "Can't kill not started process" unless defined $pid;
|
die "Can't kill not started process" unless defined $pid;
|
||||||
$ret= kill(15, $pid);
|
$ret= kill("TERM", $pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub dump_core {
|
||||||
|
my ($self)= @_;
|
||||||
|
return if IS_WINDOWS;
|
||||||
|
my $pid= $self->{SAFE_PID};
|
||||||
|
die "Can't cet core from not started process" unless defined $pid;
|
||||||
|
_verbose("Sending ABRT to $self");
|
||||||
|
kill ("ABRT", $pid);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Kill the process as fast as possible
|
# Kill the process as fast as possible
|
||||||
# and wait for it to return
|
# and wait for it to return
|
||||||
|
@ -117,6 +117,16 @@ static void kill_child (void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void handle_abort (int sig)
|
||||||
|
{
|
||||||
|
message("Got signal %d, child_pid: %d, sending ABRT", sig, child_pid);
|
||||||
|
|
||||||
|
if (child_pid > 0) {
|
||||||
|
kill (-child_pid, SIGABRT); // Don't wait for it to terminate
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void handle_signal (int sig)
|
static void handle_signal (int sig)
|
||||||
{
|
{
|
||||||
message("Got signal %d, child_pid: %d", sig, child_pid);
|
message("Got signal %d, child_pid: %d", sig, child_pid);
|
||||||
@ -144,6 +154,7 @@ int main(int argc, char* const argv[] )
|
|||||||
signal(SIGTERM, handle_signal);
|
signal(SIGTERM, handle_signal);
|
||||||
signal(SIGINT, handle_signal);
|
signal(SIGINT, handle_signal);
|
||||||
signal(SIGCHLD, handle_signal);
|
signal(SIGCHLD, handle_signal);
|
||||||
|
signal(SIGABRT, handle_abort);
|
||||||
|
|
||||||
sprintf(safe_process_name, "safe_process[%d]", own_pid);
|
sprintf(safe_process_name, "safe_process[%d]", own_pid);
|
||||||
|
|
||||||
|
@ -464,7 +464,11 @@ sub run_test_server ($$$) {
|
|||||||
else {
|
else {
|
||||||
mtr_report(" - saving '$worker_savedir/' to '$savedir/'");
|
mtr_report(" - saving '$worker_savedir/' to '$savedir/'");
|
||||||
rename($worker_savedir, $savedir);
|
rename($worker_savedir, $savedir);
|
||||||
|
# Move any core files from e.g. mysqltest
|
||||||
|
foreach my $coref (glob("core*"))
|
||||||
|
{
|
||||||
|
move($coref, $savedir);
|
||||||
|
}
|
||||||
if ($opt_max_save_core > 0) {
|
if ($opt_max_save_core > 0) {
|
||||||
# Limit number of core files saved
|
# Limit number of core files saved
|
||||||
find({ no_chdir => 1,
|
find({ no_chdir => 1,
|
||||||
@ -2381,7 +2385,7 @@ sub kill_leftovers ($) {
|
|||||||
}
|
}
|
||||||
mtr_report(" - found old pid $pid in '$elem', killing it...");
|
mtr_report(" - found old pid $pid in '$elem', killing it...");
|
||||||
|
|
||||||
my $ret= kill(9, $pid);
|
my $ret= kill("KILL", $pid);
|
||||||
if ($ret == 0) {
|
if ($ret == 0) {
|
||||||
mtr_report(" process did not exist!");
|
mtr_report(" process did not exist!");
|
||||||
next;
|
next;
|
||||||
@ -3283,11 +3287,6 @@ sub run_testcase ($) {
|
|||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
$test_timeout_proc->kill();
|
$test_timeout_proc->kill();
|
||||||
|
|
||||||
# ----------------------------------------------------
|
|
||||||
# It's not mysqltest that has exited, kill it
|
|
||||||
# ----------------------------------------------------
|
|
||||||
$test->kill();
|
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
# Check if it was a server that died
|
# Check if it was a server that died
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
@ -3297,10 +3296,30 @@ sub run_testcase ($) {
|
|||||||
$tinfo->{comment}=
|
$tinfo->{comment}=
|
||||||
"Server $proc failed during test run";
|
"Server $proc failed during test run";
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
# It's not mysqltest that has exited, kill it
|
||||||
|
# ----------------------------------------------------
|
||||||
|
$test->kill();
|
||||||
|
|
||||||
report_failure_and_restart($tinfo);
|
report_failure_and_restart($tinfo);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Try to dump core for mysqltest and all servers
|
||||||
|
foreach my $proc ($test, started(all_servers()))
|
||||||
|
{
|
||||||
|
mtr_print("Trying to dump core for $proc");
|
||||||
|
if ($proc->dump_core())
|
||||||
|
{
|
||||||
|
$proc->wait_one(20);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
# It's not mysqltest that has exited, kill it
|
||||||
|
# ----------------------------------------------------
|
||||||
|
$test->kill();
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
# Check if testcase timer expired
|
# Check if testcase timer expired
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
@ -3319,6 +3338,7 @@ sub run_testcase ($) {
|
|||||||
}
|
}
|
||||||
$tinfo->{'timeout'}= testcase_timeout(); # Mark as timeout
|
$tinfo->{'timeout'}= testcase_timeout(); # Mark as timeout
|
||||||
run_on_all($tinfo, 'analyze-timeout');
|
run_on_all($tinfo, 'analyze-timeout');
|
||||||
|
|
||||||
report_failure_and_restart($tinfo);
|
report_failure_and_restart($tinfo);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -4222,7 +4242,7 @@ sub start_servers($) {
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mysql_install_db($mysqld);
|
mysql_install_db($mysqld); # For versional testing
|
||||||
|
|
||||||
mtr_error("Failed to install system db to '$datadir'")
|
mtr_error("Failed to install system db to '$datadir'")
|
||||||
unless -d $datadir;
|
unless -d $datadir;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user