arp: Handle initial gateway query asynchronously and retry failures.

The gateway/router MAC fingerprinting could perhaps be done more
robustly in the face of suspend or carrier loss, but the time window
in which things could get confused is very small and I would rather
just rely on supervisor respawn in that case.

Even this case I don't think I've ever seen.
This commit is contained in:
Nicholas J. Kain 2017-02-24 07:39:14 -05:00
parent 7080850f38
commit 7f08d4b6fb
3 changed files with 23 additions and 4 deletions

View File

@ -63,7 +63,7 @@ int arp_probe_max = 2000; // maximum delay until repeated probe (ms)
#define DEFEND_INTERVAL 10000 // minimum interval between defensive ARPs
static struct arp_data garp = {
.wake_ts = { -1, -1, -1, -1, -1, -1 },
.wake_ts = { -1, -1, -1, -1, -1, -1, -1 },
.send_stats = {{0,0},{0,0},{0,0}},
.last_conflict_ts = 0,
.gw_check_initpings = 0,
@ -555,14 +555,30 @@ int arp_collision_timeout(struct client_state_t cs[static 1], long long nowts)
int arp_query_gateway(struct client_state_t cs[static 1])
{
if (cs->sent_gw_query) {
garp.wake_ts[AS_QUERY_GW_SEND] = -1;
return ARPR_OK;
}
if (arp_get_gw_hwaddr(cs) < 0) {
log_warning("%s: (%s) Failed to send request to get gateway and agent hardware addresses: %s",
client_config.interface, __func__, strerror(errno));
garp.wake_ts[AS_QUERY_GW_SEND] = curms() + ARP_RETRANS_DELAY;
return ARPR_FAIL;
}
cs->sent_gw_query = true;
garp.wake_ts[AS_QUERY_GW_SEND] = -1;
return ARPR_OK;
}
// 1 == not yet time, 0 == timed out, success, -1 == timed out, failure
int arp_query_gateway_timeout(struct client_state_t cs[static 1], long long nowts)
{
long long rtts = garp.wake_ts[AS_QUERY_GW_SEND];
if (rtts == -1) return 0;
if (nowts < rtts) return 1;
return arp_query_gateway(cs) == ARPR_OK ? 0 : -1;
}
int arp_announce(struct client_state_t cs[static 1])
{
if (cs->sent_first_announce && cs->sent_second_announce) {

View File

@ -63,6 +63,7 @@ typedef enum {
AS_ANNOUNCE, // Announcing our MAC/IP mapping to ethernet peers.
AS_COLLISION_CHECK, // Checking to see if another host has our IP before
// accepting a new lease.
AS_QUERY_GW_SEND, // Sending arp_ping to query the gateway MAC.
AS_GW_CHECK, // Seeing if the default GW still exists on the local
// segment after the hardware link was lost.
AS_GW_QUERY, // Finding the default GW MAC address.
@ -120,6 +121,7 @@ int arp_do_collision_check(struct client_state_t cs[static 1]);
int arp_collision_timeout(struct client_state_t cs[static 1], long long nowts);
int arp_query_gateway(struct client_state_t cs[static 1]);
int arp_query_gateway_timeout(struct client_state_t cs[static 1], long long nowts);
int arp_announce(struct client_state_t cs[static 1]);
int arp_announce_timeout(struct client_state_t cs[static 1], long long nowts);

View File

@ -537,8 +537,7 @@ skip_to_requesting:
if (arp_timeout) {
int r = arp_collision_timeout(cs, nowts);
if (r == ARPR_FREE) {
if (arp_query_gateway(cs) == ARPR_OK)
cs->sent_gw_query = true; // XXX: Handle the false case
arp_query_gateway(cs);
arp_announce(cs);
break;
} else if (r == ARPR_OK) {
@ -647,7 +646,9 @@ skip_to_requesting:
arp_defense_timeout(cs, nowts);
else
arp_announce_timeout(cs, nowts);
if (!cs->got_router_arp || !cs->got_server_arp) {
if (!cs->sent_gw_query)
arp_query_gateway_timeout(cs, nowts);
else if (!cs->got_router_arp || !cs->got_server_arp) {
int r = arp_gw_query_timeout(cs, nowts);
if (r == ARPR_OK) {
} else if (r == ARPR_FAIL) {