Reliably force restart when a subprocess has a fatal error.
Suppose a system call such as bind() fails in the sockd subprocess in request_sockd_fd(). sockd will suicide(). This will send a SIGCHLD to the master process, which the master process should respond to by calling suicide(), forcing a process supervisor to respawn the entire ndhc program. But, this doesn't reliably happen prior to this commit because of the interaction between request_sock_fd() and signalfd() [or equivalently self-pipe-trick] signal handling. request_sock_fd() makes ndhc-master synchronously wait for a response from sockd via safe_recvmsg(). The normal goto-like signal handling path is suppressed when using signalfd() , so when SIGCHLD is received, it will not be handled until io is dispatched for the signalfd or pipe. But such code will never be reached because ndhc-master is waiting in safe_recvmsg() and thus never polls signal fd status. So, revert to using traditional POSIX sigaction() for SIGCHLD, which provides exactly the required behavior for proper functioning.
This commit is contained in:
parent
f0340b1475
commit
8d89ca9f19
29
src/ndhc.c
29
src/ndhc.c
@ -158,17 +158,42 @@ void show_usage(void)
|
|||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void signal_handler(int signo)
|
||||||
|
{
|
||||||
|
switch (signo) {
|
||||||
|
case SIGCHLD: {
|
||||||
|
static const char errstr[] = "ndhc-master: Subprocess terminated unexpectedly. Exiting.";
|
||||||
|
safe_write(STDOUT_FILENO, errstr, sizeof errstr - 1);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void setup_signals_ndhc(void)
|
static void setup_signals_ndhc(void)
|
||||||
{
|
{
|
||||||
sigset_t mask;
|
sigset_t mask;
|
||||||
sigemptyset(&mask);
|
sigemptyset(&mask);
|
||||||
sigaddset(&mask, SIGUSR1);
|
sigaddset(&mask, SIGUSR1);
|
||||||
sigaddset(&mask, SIGUSR2);
|
sigaddset(&mask, SIGUSR2);
|
||||||
sigaddset(&mask, SIGCHLD);
|
|
||||||
sigaddset(&mask, SIGTERM);
|
sigaddset(&mask, SIGTERM);
|
||||||
sigaddset(&mask, SIGINT);
|
sigaddset(&mask, SIGINT);
|
||||||
if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)0) < 0)
|
if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)0) < 0)
|
||||||
suicide("sigprocmask failed");
|
suicide("sigprocmask failed");
|
||||||
|
|
||||||
|
sigemptyset(&mask);
|
||||||
|
sigaddset(&mask, SIGCHLD);
|
||||||
|
if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)0) < 0)
|
||||||
|
suicide("sigprocmask failed");
|
||||||
|
struct sigaction sa = {
|
||||||
|
.sa_handler = signal_handler,
|
||||||
|
.sa_flags = SA_RESTART,
|
||||||
|
};
|
||||||
|
sigemptyset(&sa.sa_mask);
|
||||||
|
if (sigaction(SIGCHLD, &sa, NULL))
|
||||||
|
suicide("sigaction failed");
|
||||||
|
|
||||||
if (cs.signalFd >= 0) {
|
if (cs.signalFd >= 0) {
|
||||||
epoll_del(cs.epollFd, cs.signalFd);
|
epoll_del(cs.epollFd, cs.signalFd);
|
||||||
close(cs.signalFd);
|
close(cs.signalFd);
|
||||||
@ -197,8 +222,6 @@ static int signal_dispatch(void)
|
|||||||
switch (si.ssi_signo) {
|
switch (si.ssi_signo) {
|
||||||
case SIGUSR1: return SIGNAL_RENEW;
|
case SIGUSR1: return SIGNAL_RENEW;
|
||||||
case SIGUSR2: return SIGNAL_RELEASE;
|
case SIGUSR2: return SIGNAL_RELEASE;
|
||||||
case SIGCHLD:
|
|
||||||
suicide("ndhc-master: Subprocess terminated unexpectedly. Exiting.");
|
|
||||||
case SIGTERM:
|
case SIGTERM:
|
||||||
log_line("Received SIGTERM. Exiting gracefully.");
|
log_line("Received SIGTERM. Exiting gracefully.");
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user