supervise-daemon: add a --respawn-limit option
Allow limiting the number of times supervise-daemon will attempt to respawn a daemon once it has died to prevent infinite respawning. Also, set a reasonable default limit (10 times in a 5 second period). This is for issue #126.
This commit is contained in:
parent
96c8ba2fb5
commit
3673040722
@ -167,6 +167,12 @@ Display name used for the above defined command.
|
|||||||
Process name to match when signaling the daemon.
|
Process name to match when signaling the daemon.
|
||||||
.It Ar stopsig
|
.It Ar stopsig
|
||||||
Signal to send when stopping the daemon.
|
Signal to send when stopping the daemon.
|
||||||
|
.It Ar respawn_limit
|
||||||
|
Respawn limit
|
||||||
|
.Xr supervise-daemon 8
|
||||||
|
will use for this daemon. See
|
||||||
|
.Xr supervise-daemon 8
|
||||||
|
for more information about this setting.
|
||||||
.It Ar retry
|
.It Ar retry
|
||||||
Retry schedule to use when stopping the daemon. It can either be a
|
Retry schedule to use when stopping the daemon. It can either be a
|
||||||
timeout in seconds or multiple signal/timeout pairs (like SIGTERM/5).
|
timeout in seconds or multiple signal/timeout pairs (like SIGTERM/5).
|
||||||
|
@ -34,6 +34,8 @@
|
|||||||
.Ar user
|
.Ar user
|
||||||
.Fl r , -chroot
|
.Fl r , -chroot
|
||||||
.Ar chrootpath
|
.Ar chrootpath
|
||||||
|
.Fl R , -respawn-limit
|
||||||
|
.Ar limit
|
||||||
.Fl 1 , -stdout
|
.Fl 1 , -stdout
|
||||||
.Ar logfile
|
.Ar logfile
|
||||||
.Fl 2 , -stderr
|
.Fl 2 , -stderr
|
||||||
@ -99,6 +101,24 @@ Modifies the scheduling priority of the daemon.
|
|||||||
.It Fl r , -chroot Ar path
|
.It Fl r , -chroot Ar path
|
||||||
chroot to this directory before starting the daemon. All other paths, such
|
chroot to this directory before starting the daemon. All other paths, such
|
||||||
as the path to the daemon, chdir and pidfile, should be relative to the chroot.
|
as the path to the daemon, chdir and pidfile, should be relative to the chroot.
|
||||||
|
.It Fl R , -respawn-limit Ar limit
|
||||||
|
Control how agressively
|
||||||
|
.Nm
|
||||||
|
will try to respawn a daemon when it fails to start. The limit argument
|
||||||
|
can be a pair of integers separated bya colon or the string unlimited.
|
||||||
|
.Pp
|
||||||
|
If a pair of integers is given, the first is a maximum number of respawn
|
||||||
|
attempts and the second is a time period. It should be interpreted as:
|
||||||
|
If the daemon dies and has to be respawned more than <first number>
|
||||||
|
times in any time period of <second number> seconds, exit and give up.
|
||||||
|
.Pp
|
||||||
|
For example, the default is 10:5.
|
||||||
|
This means if the supervisor respawns a daemon more than ten times
|
||||||
|
in any 5 second period, it gives up and exits.
|
||||||
|
.Pp
|
||||||
|
if unlimited is given as the limit, it means that the supervisor will
|
||||||
|
not exit or give up, no matter how many times the daemon it is
|
||||||
|
supervising needs to be respawned.
|
||||||
.It Fl u , -user Ar user
|
.It Fl u , -user Ar user
|
||||||
Start the daemon as the specified user.
|
Start the daemon as the specified user.
|
||||||
.It Fl 1 , -stdout Ar logfile
|
.It Fl 1 , -stdout Ar logfile
|
||||||
|
@ -25,6 +25,7 @@ supervise_start()
|
|||||||
eval supervise-daemon --start \
|
eval supervise-daemon --start \
|
||||||
${chroot:+--chroot} $chroot \
|
${chroot:+--chroot} $chroot \
|
||||||
${pidfile:+--pidfile} $pidfile \
|
${pidfile:+--pidfile} $pidfile \
|
||||||
|
${respawn_limit:+--respawn-limit} $respawn_limit \
|
||||||
${command_user+--user} $command_user \
|
${command_user+--user} $command_user \
|
||||||
$supervise_daemon_args \
|
$supervise_daemon_args \
|
||||||
$command \
|
$command \
|
||||||
|
@ -66,7 +66,7 @@ static struct pam_conv conv = { NULL, NULL};
|
|||||||
|
|
||||||
const char *applet = NULL;
|
const char *applet = NULL;
|
||||||
const char *extraopts = NULL;
|
const char *extraopts = NULL;
|
||||||
const char *getoptstring = "d:e:g:I:Kk:N:p:r:Su:1:2:" \
|
const char *getoptstring = "d:e:g:I:Kk:N:p:r:R:Su:1:2:" \
|
||||||
getoptstring_COMMON;
|
getoptstring_COMMON;
|
||||||
const struct option longopts[] = {
|
const struct option longopts[] = {
|
||||||
{ "chdir", 1, NULL, 'd'},
|
{ "chdir", 1, NULL, 'd'},
|
||||||
@ -79,6 +79,7 @@ const struct option longopts[] = {
|
|||||||
{ "pidfile", 1, NULL, 'p'},
|
{ "pidfile", 1, NULL, 'p'},
|
||||||
{ "user", 1, NULL, 'u'},
|
{ "user", 1, NULL, 'u'},
|
||||||
{ "chroot", 1, NULL, 'r'},
|
{ "chroot", 1, NULL, 'r'},
|
||||||
|
{ "respawn-limit", 1, NULL, 'R'},
|
||||||
{ "start", 0, NULL, 'S'},
|
{ "start", 0, NULL, 'S'},
|
||||||
{ "stdout", 1, NULL, '1'},
|
{ "stdout", 1, NULL, '1'},
|
||||||
{ "stderr", 1, NULL, '2'},
|
{ "stderr", 1, NULL, '2'},
|
||||||
@ -95,6 +96,7 @@ const char * const longopts_help[] = {
|
|||||||
"Match pid found in this file",
|
"Match pid found in this file",
|
||||||
"Change the process user",
|
"Change the process user",
|
||||||
"Chroot to this directory",
|
"Chroot to this directory",
|
||||||
|
"set a respawn limit",
|
||||||
"Start daemon",
|
"Start daemon",
|
||||||
"Redirect stdout to file",
|
"Redirect stdout to file",
|
||||||
"Redirect stderr to file",
|
"Redirect stderr to file",
|
||||||
@ -424,7 +426,13 @@ int main(int argc, char **argv)
|
|||||||
char *p;
|
char *p;
|
||||||
char *token;
|
char *token;
|
||||||
int i;
|
int i;
|
||||||
|
int n;
|
||||||
char exec_file[PATH_MAX];
|
char exec_file[PATH_MAX];
|
||||||
|
int respawn_count = 0;
|
||||||
|
int respawn_max = 10;
|
||||||
|
int respawn_period = 5;
|
||||||
|
time_t respawn_now= 0;
|
||||||
|
time_t first_spawn= 0;
|
||||||
struct passwd *pw;
|
struct passwd *pw;
|
||||||
struct group *gr;
|
struct group *gr;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
@ -519,6 +527,17 @@ int main(int argc, char **argv)
|
|||||||
ch_root = optarg;
|
ch_root = optarg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'R': /* --respawn-limit unlimited|count:period */
|
||||||
|
if (strcasecmp(optarg, "unlimited") == 0) {
|
||||||
|
respawn_max = 0;
|
||||||
|
respawn_period = 0;
|
||||||
|
} else {
|
||||||
|
n = sscanf(optarg, "%d:%d", &respawn_max, &respawn_period);
|
||||||
|
if (n != 2 || respawn_max < 1 || respawn_period < 1)
|
||||||
|
eerrorx("Invalid respawn-limit setting '%s'", optarg);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case 'u': /* --user <username>|<uid> */
|
case 'u': /* --user <username>|<uid> */
|
||||||
{
|
{
|
||||||
p = optarg;
|
p = optarg;
|
||||||
@ -713,6 +732,22 @@ int main(int argc, char **argv)
|
|||||||
syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid);
|
syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid);
|
||||||
kill(child_pid, SIGTERM);
|
kill(child_pid, SIGTERM);
|
||||||
} else {
|
} else {
|
||||||
|
if (respawn_max > 0 && respawn_period > 0) {
|
||||||
|
respawn_now = time(NULL);
|
||||||
|
if (first_spawn == 0)
|
||||||
|
first_spawn = respawn_now;
|
||||||
|
if (respawn_now - first_spawn > respawn_period) {
|
||||||
|
respawn_count = 0;
|
||||||
|
first_spawn = 0;
|
||||||
|
} else
|
||||||
|
respawn_count++;
|
||||||
|
if (respawn_count >= respawn_max) {
|
||||||
|
syslog(LOG_INFO, "respawned \"%s\" too many times, "
|
||||||
|
"exiting", exec);
|
||||||
|
exiting = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (WIFEXITED(i))
|
if (WIFEXITED(i))
|
||||||
syslog(LOG_INFO, "%s, pid %d, exited with return code %d",
|
syslog(LOG_INFO, "%s, pid %d, exited with return code %d",
|
||||||
exec, child_pid, WEXITSTATUS(i));
|
exec, child_pid, WEXITSTATUS(i));
|
||||||
|
Loading…
Reference in New Issue
Block a user