Add support for IP_HDRINCL IP option for raw sockets.

Updates #2746
Fixes #3158

PiperOrigin-RevId: 320497190
This commit is contained in:
Bhasker Hariharan 2020-07-09 16:24:43 -07:00 committed by gVisor bot
parent e506fcd931
commit 5946f11182
4 changed files with 110 additions and 10 deletions

View File

@ -2112,13 +2112,22 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
} }
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0)) return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0))
case linux.IP_HDRINCL:
if len(optVal) == 0 {
return nil
}
v, err := parseIntOrChar(optVal)
if err != nil {
return err
}
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.IPHdrIncludedOption, v != 0))
case linux.IP_ADD_SOURCE_MEMBERSHIP, case linux.IP_ADD_SOURCE_MEMBERSHIP,
linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BIND_ADDRESS_NO_PORT,
linux.IP_BLOCK_SOURCE, linux.IP_BLOCK_SOURCE,
linux.IP_CHECKSUM, linux.IP_CHECKSUM,
linux.IP_DROP_SOURCE_MEMBERSHIP, linux.IP_DROP_SOURCE_MEMBERSHIP,
linux.IP_FREEBIND, linux.IP_FREEBIND,
linux.IP_HDRINCL,
linux.IP_IPSEC_POLICY, linux.IP_IPSEC_POLICY,
linux.IP_MINTTL, linux.IP_MINTTL,
linux.IP_MSFILTER, linux.IP_MSFILTER,

View File

@ -648,6 +648,11 @@ const (
// whether an IPv6 socket is to be restricted to sending and receiving // whether an IPv6 socket is to be restricted to sending and receiving
// IPv6 packets only. // IPv6 packets only.
V6OnlyOption V6OnlyOption
// IPHdrIncludedOption is used by SetSockOpt to indicate for a raw
// endpoint that all packets being written have an IP header and the
// endpoint should not attach an IP header.
IPHdrIncludedOption
) )
// SockOptInt represents socket options which values have the int type. // SockOptInt represents socket options which values have the int type.

View File

@ -63,6 +63,7 @@ type endpoint struct {
stack *stack.Stack `state:"manual"` stack *stack.Stack `state:"manual"`
waiterQueue *waiter.Queue waiterQueue *waiter.Queue
associated bool associated bool
hdrIncluded bool
// The following fields are used to manage the receive queue and are // The following fields are used to manage the receive queue and are
// protected by rcvMu. // protected by rcvMu.
@ -108,6 +109,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
rcvBufSizeMax: 32 * 1024, rcvBufSizeMax: 32 * 1024,
sndBufSizeMax: 32 * 1024, sndBufSizeMax: 32 * 1024,
associated: associated, associated: associated,
hdrIncluded: !associated,
} }
// Override with stack defaults. // Override with stack defaults.
@ -182,10 +184,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
// Read implements tcpip.Endpoint.Read. // Read implements tcpip.Endpoint.Read.
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
if !e.associated {
return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue
}
e.rcvMu.Lock() e.rcvMu.Lock()
// If there's no data to read, return that read would block or that the // If there's no data to read, return that read would block or that the
@ -263,7 +261,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
// If this is an unassociated socket and callee provided a nonzero // If this is an unassociated socket and callee provided a nonzero
// destination address, route using that address. // destination address, route using that address.
if !e.associated { if e.hdrIncluded {
ip := header.IPv4(payloadBytes) ip := header.IPv4(payloadBytes)
if !ip.IsValid(len(payloadBytes)) { if !ip.IsValid(len(payloadBytes)) {
e.mu.RUnlock() e.mu.RUnlock()
@ -353,7 +351,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
} }
} }
if !e.associated { if e.hdrIncluded {
if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{ if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{
Data: buffer.View(payloadBytes).ToVectorisedView(), Data: buffer.View(payloadBytes).ToVectorisedView(),
}); err != nil { }); err != nil {
@ -513,6 +511,13 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool. // SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
switch opt {
case tcpip.IPHdrIncludedOption:
e.mu.Lock()
e.hdrIncluded = v
e.mu.Unlock()
return nil
}
return tcpip.ErrUnknownProtocolOption return tcpip.ErrUnknownProtocolOption
} }
@ -577,6 +582,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
case tcpip.KeepaliveEnabledOption: case tcpip.KeepaliveEnabledOption:
return false, nil return false, nil
case tcpip.IPHdrIncludedOption:
e.mu.Lock()
v := e.hdrIncluded
e.mu.Unlock()
return v, nil
default: default:
return false, tcpip.ErrUnknownProtocolOption return false, tcpip.ErrUnknownProtocolOption
} }
@ -616,8 +627,15 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) { func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
e.rcvMu.Lock() e.rcvMu.Lock()
// Drop the packet if our buffer is currently full. // Drop the packet if our buffer is currently full or if this is an unassociated
if e.rcvClosed { // endpoint (i.e endpoint created w/ IPPROTO_RAW). Such endpoints are send only
// See: https://man7.org/linux/man-pages/man7/raw.7.html
//
// An IPPROTO_RAW socket is send only. If you really want to receive
// all IP packets, use a packet(7) socket with the ETH_P_IP protocol.
// Note that packet sockets don't reassemble IP fragments, unlike raw
// sockets.
if e.rcvClosed || !e.associated {
e.rcvMu.Unlock() e.rcvMu.Unlock()
e.stack.Stats().DroppedPackets.Increment() e.stack.Stats().DroppedPackets.Increment()
e.stats.ReceiveErrors.ClosedReceiver.Increment() e.stats.ReceiveErrors.ClosedReceiver.Increment()

View File

@ -167,7 +167,7 @@ TEST_F(RawHDRINCL, NotReadable) {
// nothing to be read. // nothing to be read.
char buf[117]; char buf[117];
ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT), ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT),
SyscallFailsWithErrno(EINVAL)); SyscallFailsWithErrno(EAGAIN));
} }
// Test that we can connect() to a valid IP (loopback). // Test that we can connect() to a valid IP (loopback).
@ -332,6 +332,74 @@ TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) {
EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK); EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK);
} }
// Send and receive a packet w/ the IP_HDRINCL option set.
TEST_F(RawHDRINCL, SendAndReceiveIPHdrIncl) {
int port = 40000;
if (!IsRunningOnGvisor()) {
port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
}
FileDescriptor recv_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
FileDescriptor send_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
// Enable IP_HDRINCL option so that we can build and send w/ an IP
// header.
constexpr int kSockOptOn = 1;
ASSERT_THAT(setsockopt(send_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
sizeof(kSockOptOn)),
SyscallSucceeds());
// This is not strictly required but we do it to make sure that setting
// IP_HDRINCL on a non IPPROTO_RAW socket does not prevent it from receiving
// packets.
ASSERT_THAT(setsockopt(recv_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
sizeof(kSockOptOn)),
SyscallSucceeds());
// Construct a packet with an IP header, UDP header, and payload.
constexpr char kPayload[] = "toto";
char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
ASSERT_TRUE(
FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
socklen_t addrlen = sizeof(addr_);
ASSERT_NO_FATAL_FAILURE(sendto(send_sock.get(), &packet, sizeof(packet), 0,
reinterpret_cast<struct sockaddr*>(&addr_),
addrlen));
// Receive the payload.
char recv_buf[sizeof(packet)];
struct sockaddr_in src;
socklen_t src_size = sizeof(src);
ASSERT_THAT(recvfrom(recv_sock.get(), recv_buf, sizeof(recv_buf), 0,
reinterpret_cast<struct sockaddr*>(&src), &src_size),
SyscallSucceedsWithValue(sizeof(packet)));
EXPECT_EQ(
memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
sizeof(kPayload)),
0);
// The network stack should have set the source address.
EXPECT_EQ(src.sin_family, AF_INET);
EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
struct iphdr iphdr = {};
memcpy(&iphdr, recv_buf, sizeof(iphdr));
EXPECT_NE(iphdr.id, 0);
// Also verify that the packet we just sent was not delivered to the
// IPPROTO_RAW socket.
{
char recv_buf[sizeof(packet)];
struct sockaddr_in src;
socklen_t src_size = sizeof(src);
ASSERT_THAT(recvfrom(socket_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
reinterpret_cast<struct sockaddr*>(&src), &src_size),
SyscallFailsWithErrno(EAGAIN));
}
}
} // namespace } // namespace
} // namespace testing } // namespace testing