Fix #36: retry DNS lookup of remote syslog servers with res_init()
This patch replaces the INET_SUSPEND_TIME for DNS lookup with a 5 sec back-off to prevent DNS lookup on each message. Also, reorder WARN() and NOTE() so they are called *after* setting the f_type, otherwise we unleash endless recursive loops. To avoid filling up the log with "Failed resolving ..." messages every time we retry, we set a flag to remember we've already logged warning. Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
This commit is contained in:
parent
0a0380cbdd
commit
f516ff6023
@ -170,6 +170,7 @@ void wallmsg(struct filed *f, struct iovec *iov, int iovcnt);
|
||||
void reapchild();
|
||||
const char *cvtaddr(struct sockaddr_storage *f, int len);
|
||||
const char *cvthname(struct sockaddr *f, socklen_t len);
|
||||
static void forw_lookup(struct filed *f);
|
||||
void domark(void *arg);
|
||||
void doflush(void *arg);
|
||||
void debug_switch();
|
||||
@ -700,6 +701,9 @@ static int nslookup(const char *host, const char *service, struct addrinfo **ai)
|
||||
if (!node || !node[0])
|
||||
node = NULL;
|
||||
|
||||
/* Reset resolver cache and retry name lookup */
|
||||
res_init();
|
||||
|
||||
logit("nslookup '%s:%s'\n", node ?: "none", service);
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
hints.ai_flags = !node ? AI_PASSIVE : 0;
|
||||
@ -1579,7 +1583,10 @@ void fprintlog_write(struct filed *f, struct iovec *iov, int iovcnt, int flags)
|
||||
break;
|
||||
|
||||
case F_FORW_UNKN:
|
||||
/* nslookup retry handled by domark() timer */
|
||||
logit("\n");
|
||||
forw_lookup(f);
|
||||
if (f->f_type == F_FORW)
|
||||
goto f_forw;
|
||||
break;
|
||||
|
||||
case F_FORW:
|
||||
@ -1839,7 +1846,7 @@ static void fprintlog_first(struct filed *f, struct buf_msg *buffer)
|
||||
/* Messages generated by syslogd itself may not have a timestamp */
|
||||
check_timestamp(buffer);
|
||||
|
||||
if (f->f_type != F_FORW_SUSP) {
|
||||
if (f->f_type != F_FORW_SUSP && f->f_type != F_FORW_UNKN) {
|
||||
f->f_time = timer_now();
|
||||
f->f_prevcount = 0;
|
||||
}
|
||||
@ -2090,39 +2097,42 @@ void flog(int pri, char *fmt, ...)
|
||||
|
||||
static void forw_lookup(struct filed *f)
|
||||
{
|
||||
struct addrinfo *ai;
|
||||
time_t diff;
|
||||
char *host = f->f_un.f_forw.f_hname;
|
||||
char *serv = f->f_un.f_forw.f_serv;
|
||||
struct addrinfo *ai;
|
||||
int err, first;
|
||||
time_t diff;
|
||||
|
||||
/* Called from cfline() for initial lookup? */
|
||||
first = f->f_type == F_UNUSED ? 1 : 0;
|
||||
|
||||
/*
|
||||
* Not INET_SUSPEND_TIME, but back off a few seconds at least
|
||||
* to prevent syslogd from hammering the resolver for every
|
||||
* little message that is logged. E.g., at boot when we read
|
||||
* the kernel ring buffer.
|
||||
*/
|
||||
diff = timer_now() - f->f_time;
|
||||
if (!first && diff < INET_SUSPEND_TIME) {
|
||||
logit("Forwarding suspension not over, time left: %d\n",
|
||||
(int)(INET_SUSPEND_TIME - diff));
|
||||
if (!first && diff < 5)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!first)
|
||||
logit("Forwarding suspension to %s:%s over, retrying\n", host, serv);
|
||||
|
||||
err = nslookup(host, serv, &ai);
|
||||
if (err) {
|
||||
WARN("Failed resolving '%s:%s': %s", host, serv, gai_strerror(err));
|
||||
f->f_type = F_FORW_UNKN;
|
||||
f->f_time = timer_now();
|
||||
if (!first && !(f->f_flags & SUSP_RETR))
|
||||
WARN("Failed resolving '%s:%s': %s", host, serv, gai_strerror(err));
|
||||
f->f_flags |= SUSP_RETR; /* Retry silently */
|
||||
return;
|
||||
}
|
||||
|
||||
if (!first)
|
||||
NOTE("Successfully resolved '%s:%s', resuming operation.", host, serv);
|
||||
|
||||
f->f_flags &= ~SUSP_RETR;
|
||||
f->f_type = F_FORW;
|
||||
f->f_un.f_forw.f_addr = ai;
|
||||
f->f_prevcount = 0;
|
||||
|
||||
if (!first)
|
||||
NOTE("Successfully resolved '%s:%s', initiating forwarding.", host, serv);
|
||||
}
|
||||
|
||||
void domark(void *arg)
|
||||
@ -2144,7 +2154,8 @@ void doflush(void *arg)
|
||||
SIMPLEQ_FOREACH(f, &fhead, f_link) {
|
||||
if (f->f_type == F_FORW_UNKN) {
|
||||
forw_lookup(f);
|
||||
continue;
|
||||
if (f->f_type != F_FORW)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (f->f_prevcount && timer_now() >= REPEATTIME(f)) {
|
||||
|
@ -183,6 +183,7 @@
|
||||
#define MARK 0x008 /* this message is a mark */
|
||||
#define RFC3164 0x010 /* format log message according to RFC 3164 */
|
||||
#define RFC5424 0x020 /* format log message according to RFC 5424 */
|
||||
#define SUSP_RETR 0x040 /* suspend/forw_unkn, retrying nslookup */
|
||||
|
||||
/* Syslog timestamp formats. */
|
||||
#define BSDFMT_DATELEN 0
|
||||
|
Loading…
Reference in New Issue
Block a user