Handle carrier interruptions in arp_collision_timeout() better.

Still not ideal; we need to note and retry these errors, but these
changes are preparatory and do not introduce regressions.
This commit is contained in:
Nicholas J. Kain 2017-02-24 05:36:05 -05:00
parent 09d5e9ad3c
commit 4d87d5075a
3 changed files with 54 additions and 13 deletions

View File

@ -484,6 +484,33 @@ int arp_gw_query_timeout(struct client_state_t cs[static 1], long long nowts)
return ARPR_OK;
}
// Failure here is difficult to handle gracefully, as we do have a valid
// IP but have not yet announced it to other hosts on our ethernet
// segment. We try to do so for one minute. We must measure the time
// directly so that carrier loss or suspend are handled properly.
__attribute__((noreturn))
static void quit_after_lease_handler(struct client_state_t cs[static 1])
{
struct timespec res;
if (clock_gettime(CLOCK_MONOTONIC, &res) < 0) {
suicide("%s: (%) clock_gettime failed: %s",
client_config.interface, __func__, strerror(errno));
}
time_t init_ts = res.tv_sec;
for (;;) {
if (arp_announcement(cs) >= 0)
exit(EXIT_SUCCESS);
log_warning("%s: (%s) Failed to send ARP announcement: %s",
client_config.interface, __func__, strerror(errno));
if (clock_gettime(CLOCK_MONOTONIC, &res) < 0) {
suicide("%s: (%) clock_gettime failed: %s",
client_config.interface, __func__, strerror(errno));
}
if (res.tv_sec - init_ts > 60) break;
}
exit(EXIT_FAILURE);
}
int arp_collision_timeout(struct client_state_t cs[static 1], long long nowts)
{
if (nowts >= garp.arp_check_start_ts + ANNOUNCE_WAIT ||
@ -503,22 +530,10 @@ int arp_collision_timeout(struct client_state_t cs[static 1], long long nowts)
client_config.interface);
}
cs->routerAddr = get_option_router(&garp.dhcp_packet);
if (arp_get_gw_hwaddr(cs) < 0) {
log_warning("%s: (%s) Failed to send request to get gateway and agent hardware addresses: %s",
client_config.interface, __func__, strerror(errno));
return ARPR_FAIL;
}
stop_dhcp_listen(cs);
write_leasefile(temp_addr);
if (arp_announcement(cs) < 0) {
log_warning("%s: (%s) Failed to send first ARP announcement: %s",
client_config.interface, __func__, strerror(errno));
// If we return ARPR_FAIL here, the state machine will get messed up since we
// do have a binding, we've just not announced it yet. Ideally, we will note
// this issue and will try to announce again.
}
if (client_config.quit_after_lease)
exit(EXIT_SUCCESS);
quit_after_lease_handler(cs);
return ARPR_FREE;
}
long long rtts = garp.send_stats[ASEND_COLLISION_CHECK].ts +
@ -538,6 +553,26 @@ int arp_collision_timeout(struct client_state_t cs[static 1], long long nowts)
return ARPR_OK;
}
int arp_query_gateway(struct client_state_t cs[static 1])
{
if (arp_get_gw_hwaddr(cs) < 0) {
log_warning("%s: (%s) Failed to send request to get gateway and agent hardware addresses: %s",
client_config.interface, __func__, strerror(errno));
return ARPR_FAIL;
}
return ARPR_OK;
}
int arp_announce(struct client_state_t cs[static 1])
{
if (arp_announcement(cs) < 0) {
log_warning("%s: (%s) Failed to send ARP announcement: %s",
client_config.interface, __func__, strerror(errno));
return ARPR_FAIL;
}
return ARPR_OK;
}
int arp_do_defense(struct client_state_t cs[static 1])
{
// Even though the BPF will usually catch this case, sometimes there are

View File

@ -117,6 +117,10 @@ int arp_gw_failed(struct client_state_t cs[static 1]);
int arp_do_collision_check(struct client_state_t cs[static 1]);
int arp_collision_timeout(struct client_state_t cs[static 1], long long nowts);
int arp_query_gateway(struct client_state_t cs[static 1]);
int arp_announce(struct client_state_t cs[static 1]);
int arp_do_defense(struct client_state_t cs[static 1]);
int arp_defense_timeout(struct client_state_t cs[static 1], long long nowts);
int arp_do_gw_query(struct client_state_t cs[static 1]);

View File

@ -537,6 +537,8 @@ skip_to_requesting:
if (arp_timeout) {
int r = arp_collision_timeout(cs, nowts);
if (r == ARPR_FREE) {
arp_query_gateway(cs); // XXX: Handle failure
arp_announce(cs); // XXX: Handle failure
break;
} else if (r == ARPR_OK) {
} else if (r == ARPR_FAIL) {