Add support for IP_HDRINCL IP option for raw sockets.
Updates #2746 Fixes #3158 PiperOrigin-RevId: 320497190
This commit is contained in:
parent
e506fcd931
commit
5946f11182
|
@ -2112,13 +2112,22 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
|
||||||
}
|
}
|
||||||
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0))
|
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0))
|
||||||
|
|
||||||
|
case linux.IP_HDRINCL:
|
||||||
|
if len(optVal) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
v, err := parseIntOrChar(optVal)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.IPHdrIncludedOption, v != 0))
|
||||||
|
|
||||||
case linux.IP_ADD_SOURCE_MEMBERSHIP,
|
case linux.IP_ADD_SOURCE_MEMBERSHIP,
|
||||||
linux.IP_BIND_ADDRESS_NO_PORT,
|
linux.IP_BIND_ADDRESS_NO_PORT,
|
||||||
linux.IP_BLOCK_SOURCE,
|
linux.IP_BLOCK_SOURCE,
|
||||||
linux.IP_CHECKSUM,
|
linux.IP_CHECKSUM,
|
||||||
linux.IP_DROP_SOURCE_MEMBERSHIP,
|
linux.IP_DROP_SOURCE_MEMBERSHIP,
|
||||||
linux.IP_FREEBIND,
|
linux.IP_FREEBIND,
|
||||||
linux.IP_HDRINCL,
|
|
||||||
linux.IP_IPSEC_POLICY,
|
linux.IP_IPSEC_POLICY,
|
||||||
linux.IP_MINTTL,
|
linux.IP_MINTTL,
|
||||||
linux.IP_MSFILTER,
|
linux.IP_MSFILTER,
|
||||||
|
|
|
@ -648,6 +648,11 @@ const (
|
||||||
// whether an IPv6 socket is to be restricted to sending and receiving
|
// whether an IPv6 socket is to be restricted to sending and receiving
|
||||||
// IPv6 packets only.
|
// IPv6 packets only.
|
||||||
V6OnlyOption
|
V6OnlyOption
|
||||||
|
|
||||||
|
// IPHdrIncludedOption is used by SetSockOpt to indicate for a raw
|
||||||
|
// endpoint that all packets being written have an IP header and the
|
||||||
|
// endpoint should not attach an IP header.
|
||||||
|
IPHdrIncludedOption
|
||||||
)
|
)
|
||||||
|
|
||||||
// SockOptInt represents socket options which values have the int type.
|
// SockOptInt represents socket options which values have the int type.
|
||||||
|
|
|
@ -63,6 +63,7 @@ type endpoint struct {
|
||||||
stack *stack.Stack `state:"manual"`
|
stack *stack.Stack `state:"manual"`
|
||||||
waiterQueue *waiter.Queue
|
waiterQueue *waiter.Queue
|
||||||
associated bool
|
associated bool
|
||||||
|
hdrIncluded bool
|
||||||
|
|
||||||
// The following fields are used to manage the receive queue and are
|
// The following fields are used to manage the receive queue and are
|
||||||
// protected by rcvMu.
|
// protected by rcvMu.
|
||||||
|
@ -108,6 +109,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
|
||||||
rcvBufSizeMax: 32 * 1024,
|
rcvBufSizeMax: 32 * 1024,
|
||||||
sndBufSizeMax: 32 * 1024,
|
sndBufSizeMax: 32 * 1024,
|
||||||
associated: associated,
|
associated: associated,
|
||||||
|
hdrIncluded: !associated,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Override with stack defaults.
|
// Override with stack defaults.
|
||||||
|
@ -182,10 +184,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
|
||||||
|
|
||||||
// Read implements tcpip.Endpoint.Read.
|
// Read implements tcpip.Endpoint.Read.
|
||||||
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
|
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
|
||||||
if !e.associated {
|
|
||||||
return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue
|
|
||||||
}
|
|
||||||
|
|
||||||
e.rcvMu.Lock()
|
e.rcvMu.Lock()
|
||||||
|
|
||||||
// If there's no data to read, return that read would block or that the
|
// If there's no data to read, return that read would block or that the
|
||||||
|
@ -263,7 +261,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
|
||||||
|
|
||||||
// If this is an unassociated socket and callee provided a nonzero
|
// If this is an unassociated socket and callee provided a nonzero
|
||||||
// destination address, route using that address.
|
// destination address, route using that address.
|
||||||
if !e.associated {
|
if e.hdrIncluded {
|
||||||
ip := header.IPv4(payloadBytes)
|
ip := header.IPv4(payloadBytes)
|
||||||
if !ip.IsValid(len(payloadBytes)) {
|
if !ip.IsValid(len(payloadBytes)) {
|
||||||
e.mu.RUnlock()
|
e.mu.RUnlock()
|
||||||
|
@ -353,7 +351,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !e.associated {
|
if e.hdrIncluded {
|
||||||
if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{
|
if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{
|
||||||
Data: buffer.View(payloadBytes).ToVectorisedView(),
|
Data: buffer.View(payloadBytes).ToVectorisedView(),
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
|
@ -513,6 +511,13 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
|
||||||
|
|
||||||
// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
|
// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
|
||||||
func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
|
func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
|
||||||
|
switch opt {
|
||||||
|
case tcpip.IPHdrIncludedOption:
|
||||||
|
e.mu.Lock()
|
||||||
|
e.hdrIncluded = v
|
||||||
|
e.mu.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return tcpip.ErrUnknownProtocolOption
|
return tcpip.ErrUnknownProtocolOption
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -577,6 +582,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
|
||||||
case tcpip.KeepaliveEnabledOption:
|
case tcpip.KeepaliveEnabledOption:
|
||||||
return false, nil
|
return false, nil
|
||||||
|
|
||||||
|
case tcpip.IPHdrIncludedOption:
|
||||||
|
e.mu.Lock()
|
||||||
|
v := e.hdrIncluded
|
||||||
|
e.mu.Unlock()
|
||||||
|
return v, nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false, tcpip.ErrUnknownProtocolOption
|
return false, tcpip.ErrUnknownProtocolOption
|
||||||
}
|
}
|
||||||
|
@ -616,8 +627,15 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
|
||||||
func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
|
func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
|
||||||
e.rcvMu.Lock()
|
e.rcvMu.Lock()
|
||||||
|
|
||||||
// Drop the packet if our buffer is currently full.
|
// Drop the packet if our buffer is currently full or if this is an unassociated
|
||||||
if e.rcvClosed {
|
// endpoint (i.e endpoint created w/ IPPROTO_RAW). Such endpoints are send only
|
||||||
|
// See: https://man7.org/linux/man-pages/man7/raw.7.html
|
||||||
|
//
|
||||||
|
// An IPPROTO_RAW socket is send only. If you really want to receive
|
||||||
|
// all IP packets, use a packet(7) socket with the ETH_P_IP protocol.
|
||||||
|
// Note that packet sockets don't reassemble IP fragments, unlike raw
|
||||||
|
// sockets.
|
||||||
|
if e.rcvClosed || !e.associated {
|
||||||
e.rcvMu.Unlock()
|
e.rcvMu.Unlock()
|
||||||
e.stack.Stats().DroppedPackets.Increment()
|
e.stack.Stats().DroppedPackets.Increment()
|
||||||
e.stats.ReceiveErrors.ClosedReceiver.Increment()
|
e.stats.ReceiveErrors.ClosedReceiver.Increment()
|
||||||
|
|
|
@ -167,7 +167,7 @@ TEST_F(RawHDRINCL, NotReadable) {
|
||||||
// nothing to be read.
|
// nothing to be read.
|
||||||
char buf[117];
|
char buf[117];
|
||||||
ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT),
|
ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT),
|
||||||
SyscallFailsWithErrno(EINVAL));
|
SyscallFailsWithErrno(EAGAIN));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that we can connect() to a valid IP (loopback).
|
// Test that we can connect() to a valid IP (loopback).
|
||||||
|
@ -332,6 +332,74 @@ TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) {
|
||||||
EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK);
|
EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Send and receive a packet w/ the IP_HDRINCL option set.
|
||||||
|
TEST_F(RawHDRINCL, SendAndReceiveIPHdrIncl) {
|
||||||
|
int port = 40000;
|
||||||
|
if (!IsRunningOnGvisor()) {
|
||||||
|
port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
|
||||||
|
PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
FileDescriptor recv_sock =
|
||||||
|
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
|
||||||
|
|
||||||
|
FileDescriptor send_sock =
|
||||||
|
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
|
||||||
|
|
||||||
|
// Enable IP_HDRINCL option so that we can build and send w/ an IP
|
||||||
|
// header.
|
||||||
|
constexpr int kSockOptOn = 1;
|
||||||
|
ASSERT_THAT(setsockopt(send_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
|
||||||
|
sizeof(kSockOptOn)),
|
||||||
|
SyscallSucceeds());
|
||||||
|
// This is not strictly required but we do it to make sure that setting
|
||||||
|
// IP_HDRINCL on a non IPPROTO_RAW socket does not prevent it from receiving
|
||||||
|
// packets.
|
||||||
|
ASSERT_THAT(setsockopt(recv_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
|
||||||
|
sizeof(kSockOptOn)),
|
||||||
|
SyscallSucceeds());
|
||||||
|
|
||||||
|
// Construct a packet with an IP header, UDP header, and payload.
|
||||||
|
constexpr char kPayload[] = "toto";
|
||||||
|
char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
|
||||||
|
ASSERT_TRUE(
|
||||||
|
FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
|
||||||
|
|
||||||
|
socklen_t addrlen = sizeof(addr_);
|
||||||
|
ASSERT_NO_FATAL_FAILURE(sendto(send_sock.get(), &packet, sizeof(packet), 0,
|
||||||
|
reinterpret_cast<struct sockaddr*>(&addr_),
|
||||||
|
addrlen));
|
||||||
|
|
||||||
|
// Receive the payload.
|
||||||
|
char recv_buf[sizeof(packet)];
|
||||||
|
struct sockaddr_in src;
|
||||||
|
socklen_t src_size = sizeof(src);
|
||||||
|
ASSERT_THAT(recvfrom(recv_sock.get(), recv_buf, sizeof(recv_buf), 0,
|
||||||
|
reinterpret_cast<struct sockaddr*>(&src), &src_size),
|
||||||
|
SyscallSucceedsWithValue(sizeof(packet)));
|
||||||
|
EXPECT_EQ(
|
||||||
|
memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
|
||||||
|
sizeof(kPayload)),
|
||||||
|
0);
|
||||||
|
// The network stack should have set the source address.
|
||||||
|
EXPECT_EQ(src.sin_family, AF_INET);
|
||||||
|
EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
|
||||||
|
struct iphdr iphdr = {};
|
||||||
|
memcpy(&iphdr, recv_buf, sizeof(iphdr));
|
||||||
|
EXPECT_NE(iphdr.id, 0);
|
||||||
|
|
||||||
|
// Also verify that the packet we just sent was not delivered to the
|
||||||
|
// IPPROTO_RAW socket.
|
||||||
|
{
|
||||||
|
char recv_buf[sizeof(packet)];
|
||||||
|
struct sockaddr_in src;
|
||||||
|
socklen_t src_size = sizeof(src);
|
||||||
|
ASSERT_THAT(recvfrom(socket_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
|
||||||
|
reinterpret_cast<struct sockaddr*>(&src), &src_size),
|
||||||
|
SyscallFailsWithErrno(EAGAIN));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
} // namespace testing
|
} // namespace testing
|
||||||
|
|
Loading…
Reference in New Issue