From 8d89ca9f19643648729f2e43834960ea7fedf139 Mon Sep 17 00:00:00 2001 From: "Nicholas J. Kain" Date: Tue, 20 Oct 2020 01:12:59 -0400 Subject: [PATCH] Reliably force restart when a subprocess has a fatal error. Suppose a system call such as bind() fails in the sockd subprocess in request_sockd_fd(). sockd will suicide(). This will send a SIGCHLD to the master process, which the master process should respond to by calling suicide(), forcing a process supervisor to respawn the entire ndhc program. But, this doesn't reliably happen prior to this commit because of the interaction between request_sock_fd() and signalfd() [or equivalently self-pipe-trick] signal handling. request_sock_fd() makes ndhc-master synchronously wait for a response from sockd via safe_recvmsg(). The normal goto-like signal handling path is suppressed when using signalfd() , so when SIGCHLD is received, it will not be handled until io is dispatched for the signalfd or pipe. But such code will never be reached because ndhc-master is waiting in safe_recvmsg() and thus never polls signal fd status. So, revert to using traditional POSIX sigaction() for SIGCHLD, which provides exactly the required behavior for proper functioning. --- src/ndhc.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/ndhc.c b/src/ndhc.c index e7ffaa8..7425cd9 100644 --- a/src/ndhc.c +++ b/src/ndhc.c @@ -158,17 +158,42 @@ void show_usage(void) exit(EXIT_SUCCESS); } +static void signal_handler(int signo) +{ + switch (signo) { + case SIGCHLD: { + static const char errstr[] = "ndhc-master: Subprocess terminated unexpectedly. Exiting."; + safe_write(STDOUT_FILENO, errstr, sizeof errstr - 1); + exit(EXIT_FAILURE); + } + default: + break; + } +} + static void setup_signals_ndhc(void) { sigset_t mask; sigemptyset(&mask); sigaddset(&mask, SIGUSR1); sigaddset(&mask, SIGUSR2); - sigaddset(&mask, SIGCHLD); sigaddset(&mask, SIGTERM); sigaddset(&mask, SIGINT); if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)0) < 0) suicide("sigprocmask failed"); + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)0) < 0) + suicide("sigprocmask failed"); + struct sigaction sa = { + .sa_handler = signal_handler, + .sa_flags = SA_RESTART, + }; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGCHLD, &sa, NULL)) + suicide("sigaction failed"); + if (cs.signalFd >= 0) { epoll_del(cs.epollFd, cs.signalFd); close(cs.signalFd); @@ -197,8 +222,6 @@ static int signal_dispatch(void) switch (si.ssi_signo) { case SIGUSR1: return SIGNAL_RENEW; case SIGUSR2: return SIGNAL_RELEASE; - case SIGCHLD: - suicide("ndhc-master: Subprocess terminated unexpectedly. Exiting."); case SIGTERM: log_line("Received SIGTERM. Exiting gracefully."); exit(EXIT_SUCCESS);