From bcb97a3bb7560fab49ac35d8c184a510cb81f801 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 10 Dec 2020 20:06:13 -0800 Subject: [PATCH] Disable host reassembly for fragments. fdbased endpoint was enabling fragment reassembly on the host AF_PACKET socket to ensure that fragments are delivered inorder to the right dispatcher. But this prevents fragments from being delivered to gvisor at all and makes testing of gvisor's fragment reassembly code impossible. The potential impact from this is minimal since IP Fragmentation is not really that prevelant and in cases where we do get fragments we may deliver the fragment out of order to the TCP layer as multiple network dispatchers may process the fragments and deliver a reassembled fragment after the next packet has been delivered to the TCP endpoint. While not desirable I believe the impact from this is minimal due to low prevalence of fragmentation. Also removed PktType and Hatype fields when binding the socket as these are not used when binding. Its just confusing to have them specified. See: https://man7.org/linux/man-pages/man7/packet.7.html "Fields used for binding are sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex." Fixes #5055 PiperOrigin-RevId: 346919439 --- pkg/tcpip/link/fdbased/endpoint.go | 9 ++++++--- runsc/sandbox/network.go | 2 -- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 9f2084eae..cb94cbea6 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -284,9 +284,12 @@ func createInboundDispatcher(e *endpoint, fd int, isSocket bool) (linkDispatcher } switch sa.(type) { case *unix.SockaddrLinklayer: - // enable PACKET_FANOUT mode is the underlying socket is - // of type AF_PACKET. - const fanoutType = 0x8000 // PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG + // Enable PACKET_FANOUT mode if the underlying socket is of type + // AF_PACKET. We do not enable PACKET_FANOUT_FLAG_DEFRAG as that will + // prevent gvisor from receiving fragmented packets and the host does the + // reassembly on our behalf before delivering the fragments. This makes it + // hard to test fragmentation reassembly code in Netstack. + const fanoutType = unix.PACKET_FANOUT_HASH fanoutArg := fanoutID | fanoutType<<16 if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_FANOUT, fanoutArg); err != nil { return nil, fmt.Errorf("failed to enable PACKET_FANOUT option: %v", err) diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index d8112e7a2..9e429f7d5 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -279,8 +279,6 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) ( ll := syscall.SockaddrLinklayer{ Protocol: protocol, Ifindex: iface.Index, - Hatype: 0, // No ARP type. - Pkttype: syscall.PACKET_OTHERHOST, } if err := syscall.Bind(fd, &ll); err != nil { return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err)