Track number of packets queued to Failed neighbors
Add a NIC-specific neighbor table statistic so we can determine how many packets have been queued to Failed neighbors, indicating an unhealthy local network. This change assists us to debug in-field issues where subsequent traffic to a neighbor fails. Fixes #4819 PiperOrigin-RevId: 344131119
This commit is contained in:
parent
d492b21319
commit
f90ab60a8a
|
@ -24,9 +24,16 @@ import (
|
|||
|
||||
const neighborCacheSize = 512 // max entries per interface
|
||||
|
||||
// NeighborStats holds metrics for the neighbor table.
|
||||
type NeighborStats struct {
|
||||
// FailedEntryLookups counts the number of lookups performed on an entry in
|
||||
// Failed state.
|
||||
FailedEntryLookups *tcpip.StatCounter
|
||||
}
|
||||
|
||||
// neighborCache maps IP addresses to link addresses. It uses the Least
|
||||
// Recently Used (LRU) eviction strategy to implement a bounded cache for
|
||||
// dynmically acquired entries. It contains the state machine and configuration
|
||||
// dynamically acquired entries. It contains the state machine and configuration
|
||||
// for running Neighbor Unreachability Detection (NUD).
|
||||
//
|
||||
// There are two types of entries in the neighbor cache:
|
||||
|
|
|
@ -86,7 +86,8 @@ func newTestNeighborCache(nudDisp NUDDispatcher, config NUDConfigurations, clock
|
|||
clock: clock,
|
||||
nudDisp: nudDisp,
|
||||
},
|
||||
id: 1,
|
||||
id: 1,
|
||||
stats: makeNICStats(),
|
||||
},
|
||||
state: NewNUDState(config, rng),
|
||||
cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize),
|
||||
|
|
|
@ -347,9 +347,10 @@ func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) {
|
|||
e.setStateLocked(Delay)
|
||||
e.dispatchChangeEventLocked()
|
||||
|
||||
case Incomplete, Reachable, Delay, Probe, Static, Failed:
|
||||
case Incomplete, Reachable, Delay, Probe, Static:
|
||||
// Do nothing
|
||||
|
||||
case Failed:
|
||||
e.nic.stats.Neighbor.FailedEntryLookups.Increment()
|
||||
default:
|
||||
panic(fmt.Sprintf("Invalid cache entry state: %s", e.neigh.State))
|
||||
}
|
||||
|
|
|
@ -89,7 +89,7 @@ func eventDiffOptsWithSort() []cmp.Option {
|
|||
// | Stale | Reachable | Solicited confirmation w/o address | Notify wakers | Changed |
|
||||
// | Stale | Stale | Override confirmation | Update LinkAddr | Changed |
|
||||
// | Stale | Stale | Probe w/ different address | Update LinkAddr | Changed |
|
||||
// | Stale | Delay | Packet sent | | Changed |
|
||||
// | Stale | Delay | Packet queued | | Changed |
|
||||
// | Delay | Reachable | Upper-layer confirmation | | Changed |
|
||||
// | Delay | Reachable | Solicited override confirmation | Update LinkAddr | Changed |
|
||||
// | Delay | Reachable | Solicited confirmation w/o address | Notify wakers | Changed |
|
||||
|
@ -101,6 +101,7 @@ func eventDiffOptsWithSort() []cmp.Option {
|
|||
// | Probe | Stale | Probe or confirmation w/ different address | | Changed |
|
||||
// | Probe | Probe | Retransmit timer expired | Send probe | Changed |
|
||||
// | Probe | Failed | Max probes sent without reply | Notify wakers | Removed |
|
||||
// | Failed | Failed | Packet queued | | |
|
||||
// | Failed | | Unreachability timer expired | Delete entry | |
|
||||
|
||||
type testEntryEventType uint8
|
||||
|
@ -228,6 +229,7 @@ func entryTestSetup(c NUDConfigurations) (*neighborEntry, *testNUDDispatcher, *e
|
|||
clock: clock,
|
||||
nudDisp: &disp,
|
||||
},
|
||||
stats: makeNICStats(),
|
||||
}
|
||||
nic.networkEndpoints = map[tcpip.NetworkProtocolNumber]NetworkEndpoint{
|
||||
header.IPv6ProtocolNumber: (&testIPv6Protocol{}).NewEndpoint(&nic, nil, nil, nil),
|
||||
|
@ -3433,6 +3435,146 @@ func TestEntryProbeToFailed(t *testing.T) {
|
|||
nudDisp.mu.Unlock()
|
||||
}
|
||||
|
||||
func TestEntryFailedToFailed(t *testing.T) {
|
||||
c := DefaultNUDConfigurations()
|
||||
c.MaxMulticastProbes = 3
|
||||
c.MaxUnicastProbes = 3
|
||||
e, nudDisp, linkRes, clock := entryTestSetup(c)
|
||||
|
||||
// Verify the cache contains the entry.
|
||||
if _, ok := e.nic.neigh.cache[entryTestAddr1]; !ok {
|
||||
t.Errorf("expected entry %q to exist in the neighbor cache", entryTestAddr1)
|
||||
}
|
||||
|
||||
// TODO(gvisor.dev/issue/4872): Use helper functions to start entry tests in
|
||||
// their expected state.
|
||||
e.mu.Lock()
|
||||
e.handlePacketQueuedLocked(entryTestAddr2)
|
||||
e.mu.Unlock()
|
||||
|
||||
runImmediatelyScheduledJobs(clock)
|
||||
{
|
||||
wantProbes := []entryTestProbeInfo{
|
||||
{
|
||||
RemoteAddress: entryTestAddr1,
|
||||
LocalAddress: entryTestAddr2,
|
||||
},
|
||||
}
|
||||
linkRes.mu.Lock()
|
||||
diff := cmp.Diff(linkRes.probes, wantProbes)
|
||||
linkRes.probes = nil
|
||||
linkRes.mu.Unlock()
|
||||
if diff != "" {
|
||||
t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
|
||||
Solicited: false,
|
||||
Override: false,
|
||||
IsRouter: false,
|
||||
})
|
||||
e.handlePacketQueuedLocked(entryTestAddr2)
|
||||
e.mu.Unlock()
|
||||
|
||||
waitFor := c.DelayFirstProbeTime + c.RetransmitTimer*time.Duration(c.MaxUnicastProbes)
|
||||
clock.Advance(waitFor)
|
||||
{
|
||||
wantProbes := []entryTestProbeInfo{
|
||||
{
|
||||
RemoteAddress: entryTestAddr1,
|
||||
RemoteLinkAddress: entryTestLinkAddr1,
|
||||
},
|
||||
{
|
||||
RemoteAddress: entryTestAddr1,
|
||||
RemoteLinkAddress: entryTestLinkAddr1,
|
||||
},
|
||||
{
|
||||
RemoteAddress: entryTestAddr1,
|
||||
RemoteLinkAddress: entryTestLinkAddr1,
|
||||
},
|
||||
}
|
||||
linkRes.mu.Lock()
|
||||
diff := cmp.Diff(linkRes.probes, wantProbes)
|
||||
linkRes.mu.Unlock()
|
||||
if diff != "" {
|
||||
t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
wantEvents := []testEntryEventInfo{
|
||||
{
|
||||
EventType: entryTestAdded,
|
||||
NICID: entryTestNICID,
|
||||
Entry: NeighborEntry{
|
||||
Addr: entryTestAddr1,
|
||||
LinkAddr: tcpip.LinkAddress(""),
|
||||
State: Incomplete,
|
||||
},
|
||||
},
|
||||
{
|
||||
EventType: entryTestChanged,
|
||||
NICID: entryTestNICID,
|
||||
Entry: NeighborEntry{
|
||||
Addr: entryTestAddr1,
|
||||
LinkAddr: entryTestLinkAddr1,
|
||||
State: Stale,
|
||||
},
|
||||
},
|
||||
{
|
||||
EventType: entryTestChanged,
|
||||
NICID: entryTestNICID,
|
||||
Entry: NeighborEntry{
|
||||
Addr: entryTestAddr1,
|
||||
LinkAddr: entryTestLinkAddr1,
|
||||
State: Delay,
|
||||
},
|
||||
},
|
||||
{
|
||||
EventType: entryTestChanged,
|
||||
NICID: entryTestNICID,
|
||||
Entry: NeighborEntry{
|
||||
Addr: entryTestAddr1,
|
||||
LinkAddr: entryTestLinkAddr1,
|
||||
State: Probe,
|
||||
},
|
||||
},
|
||||
{
|
||||
EventType: entryTestRemoved,
|
||||
NICID: entryTestNICID,
|
||||
Entry: NeighborEntry{
|
||||
Addr: entryTestAddr1,
|
||||
LinkAddr: entryTestLinkAddr1,
|
||||
State: Probe,
|
||||
},
|
||||
},
|
||||
}
|
||||
nudDisp.mu.Lock()
|
||||
if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" {
|
||||
t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
|
||||
}
|
||||
nudDisp.mu.Unlock()
|
||||
|
||||
failedLookups := e.nic.stats.Neighbor.FailedEntryLookups
|
||||
if got := failedLookups.Value(); got != 0 {
|
||||
t.Errorf("got Neighbor.FailedEntryLookups = %d, want = 0", got)
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
// Verify queuing a packet to the entry immediately fails.
|
||||
e.handlePacketQueuedLocked(entryTestAddr2)
|
||||
state := e.neigh.State
|
||||
e.mu.Unlock()
|
||||
if state != Failed {
|
||||
t.Errorf("got e.neigh.State = %q, want = %q", state, Failed)
|
||||
}
|
||||
|
||||
if got := failedLookups.Value(); got != 1 {
|
||||
t.Errorf("got Neighbor.FailedEntryLookups = %d, want = 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEntryFailedGetsDeleted(t *testing.T) {
|
||||
c := DefaultNUDConfigurations()
|
||||
c.MaxMulticastProbes = 3
|
||||
|
|
|
@ -60,12 +60,14 @@ type NIC struct {
|
|||
}
|
||||
}
|
||||
|
||||
// NICStats includes transmitted and received stats.
|
||||
// NICStats hold statistics for a NIC.
|
||||
type NICStats struct {
|
||||
Tx DirectionStats
|
||||
Rx DirectionStats
|
||||
|
||||
DisabledRx DirectionStats
|
||||
|
||||
Neighbor NeighborStats
|
||||
}
|
||||
|
||||
func makeNICStats() NICStats {
|
||||
|
|
Loading…
Reference in New Issue