Add support for IP_HDRINCL IP option for raw sockets.
Updates #2746 Fixes #3158 PiperOrigin-RevId: 320497190
This commit is contained in:
parent
e506fcd931
commit
5946f11182
|
@ -2112,13 +2112,22 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
|
|||
}
|
||||
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0))
|
||||
|
||||
case linux.IP_HDRINCL:
|
||||
if len(optVal) == 0 {
|
||||
return nil
|
||||
}
|
||||
v, err := parseIntOrChar(optVal)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.IPHdrIncludedOption, v != 0))
|
||||
|
||||
case linux.IP_ADD_SOURCE_MEMBERSHIP,
|
||||
linux.IP_BIND_ADDRESS_NO_PORT,
|
||||
linux.IP_BLOCK_SOURCE,
|
||||
linux.IP_CHECKSUM,
|
||||
linux.IP_DROP_SOURCE_MEMBERSHIP,
|
||||
linux.IP_FREEBIND,
|
||||
linux.IP_HDRINCL,
|
||||
linux.IP_IPSEC_POLICY,
|
||||
linux.IP_MINTTL,
|
||||
linux.IP_MSFILTER,
|
||||
|
|
|
@ -648,6 +648,11 @@ const (
|
|||
// whether an IPv6 socket is to be restricted to sending and receiving
|
||||
// IPv6 packets only.
|
||||
V6OnlyOption
|
||||
|
||||
// IPHdrIncludedOption is used by SetSockOpt to indicate for a raw
|
||||
// endpoint that all packets being written have an IP header and the
|
||||
// endpoint should not attach an IP header.
|
||||
IPHdrIncludedOption
|
||||
)
|
||||
|
||||
// SockOptInt represents socket options which values have the int type.
|
||||
|
|
|
@ -63,6 +63,7 @@ type endpoint struct {
|
|||
stack *stack.Stack `state:"manual"`
|
||||
waiterQueue *waiter.Queue
|
||||
associated bool
|
||||
hdrIncluded bool
|
||||
|
||||
// The following fields are used to manage the receive queue and are
|
||||
// protected by rcvMu.
|
||||
|
@ -108,6 +109,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
|
|||
rcvBufSizeMax: 32 * 1024,
|
||||
sndBufSizeMax: 32 * 1024,
|
||||
associated: associated,
|
||||
hdrIncluded: !associated,
|
||||
}
|
||||
|
||||
// Override with stack defaults.
|
||||
|
@ -182,10 +184,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
|
|||
|
||||
// Read implements tcpip.Endpoint.Read.
|
||||
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
|
||||
if !e.associated {
|
||||
return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue
|
||||
}
|
||||
|
||||
e.rcvMu.Lock()
|
||||
|
||||
// If there's no data to read, return that read would block or that the
|
||||
|
@ -263,7 +261,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
|
|||
|
||||
// If this is an unassociated socket and callee provided a nonzero
|
||||
// destination address, route using that address.
|
||||
if !e.associated {
|
||||
if e.hdrIncluded {
|
||||
ip := header.IPv4(payloadBytes)
|
||||
if !ip.IsValid(len(payloadBytes)) {
|
||||
e.mu.RUnlock()
|
||||
|
@ -353,7 +351,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
|
|||
}
|
||||
}
|
||||
|
||||
if !e.associated {
|
||||
if e.hdrIncluded {
|
||||
if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{
|
||||
Data: buffer.View(payloadBytes).ToVectorisedView(),
|
||||
}); err != nil {
|
||||
|
@ -513,6 +511,13 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
|
|||
|
||||
// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
|
||||
func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
|
||||
switch opt {
|
||||
case tcpip.IPHdrIncludedOption:
|
||||
e.mu.Lock()
|
||||
e.hdrIncluded = v
|
||||
e.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
return tcpip.ErrUnknownProtocolOption
|
||||
}
|
||||
|
||||
|
@ -577,6 +582,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
|
|||
case tcpip.KeepaliveEnabledOption:
|
||||
return false, nil
|
||||
|
||||
case tcpip.IPHdrIncludedOption:
|
||||
e.mu.Lock()
|
||||
v := e.hdrIncluded
|
||||
e.mu.Unlock()
|
||||
return v, nil
|
||||
|
||||
default:
|
||||
return false, tcpip.ErrUnknownProtocolOption
|
||||
}
|
||||
|
@ -616,8 +627,15 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
|
|||
func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
|
||||
e.rcvMu.Lock()
|
||||
|
||||
// Drop the packet if our buffer is currently full.
|
||||
if e.rcvClosed {
|
||||
// Drop the packet if our buffer is currently full or if this is an unassociated
|
||||
// endpoint (i.e endpoint created w/ IPPROTO_RAW). Such endpoints are send only
|
||||
// See: https://man7.org/linux/man-pages/man7/raw.7.html
|
||||
//
|
||||
// An IPPROTO_RAW socket is send only. If you really want to receive
|
||||
// all IP packets, use a packet(7) socket with the ETH_P_IP protocol.
|
||||
// Note that packet sockets don't reassemble IP fragments, unlike raw
|
||||
// sockets.
|
||||
if e.rcvClosed || !e.associated {
|
||||
e.rcvMu.Unlock()
|
||||
e.stack.Stats().DroppedPackets.Increment()
|
||||
e.stats.ReceiveErrors.ClosedReceiver.Increment()
|
||||
|
|
|
@ -167,7 +167,7 @@ TEST_F(RawHDRINCL, NotReadable) {
|
|||
// nothing to be read.
|
||||
char buf[117];
|
||||
ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT),
|
||||
SyscallFailsWithErrno(EINVAL));
|
||||
SyscallFailsWithErrno(EAGAIN));
|
||||
}
|
||||
|
||||
// Test that we can connect() to a valid IP (loopback).
|
||||
|
@ -332,6 +332,74 @@ TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) {
|
|||
EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK);
|
||||
}
|
||||
|
||||
// Send and receive a packet w/ the IP_HDRINCL option set.
|
||||
TEST_F(RawHDRINCL, SendAndReceiveIPHdrIncl) {
|
||||
int port = 40000;
|
||||
if (!IsRunningOnGvisor()) {
|
||||
port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
|
||||
PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
|
||||
}
|
||||
|
||||
FileDescriptor recv_sock =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
|
||||
|
||||
FileDescriptor send_sock =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
|
||||
|
||||
// Enable IP_HDRINCL option so that we can build and send w/ an IP
|
||||
// header.
|
||||
constexpr int kSockOptOn = 1;
|
||||
ASSERT_THAT(setsockopt(send_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
|
||||
sizeof(kSockOptOn)),
|
||||
SyscallSucceeds());
|
||||
// This is not strictly required but we do it to make sure that setting
|
||||
// IP_HDRINCL on a non IPPROTO_RAW socket does not prevent it from receiving
|
||||
// packets.
|
||||
ASSERT_THAT(setsockopt(recv_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
|
||||
sizeof(kSockOptOn)),
|
||||
SyscallSucceeds());
|
||||
|
||||
// Construct a packet with an IP header, UDP header, and payload.
|
||||
constexpr char kPayload[] = "toto";
|
||||
char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
|
||||
ASSERT_TRUE(
|
||||
FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
|
||||
|
||||
socklen_t addrlen = sizeof(addr_);
|
||||
ASSERT_NO_FATAL_FAILURE(sendto(send_sock.get(), &packet, sizeof(packet), 0,
|
||||
reinterpret_cast<struct sockaddr*>(&addr_),
|
||||
addrlen));
|
||||
|
||||
// Receive the payload.
|
||||
char recv_buf[sizeof(packet)];
|
||||
struct sockaddr_in src;
|
||||
socklen_t src_size = sizeof(src);
|
||||
ASSERT_THAT(recvfrom(recv_sock.get(), recv_buf, sizeof(recv_buf), 0,
|
||||
reinterpret_cast<struct sockaddr*>(&src), &src_size),
|
||||
SyscallSucceedsWithValue(sizeof(packet)));
|
||||
EXPECT_EQ(
|
||||
memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
|
||||
sizeof(kPayload)),
|
||||
0);
|
||||
// The network stack should have set the source address.
|
||||
EXPECT_EQ(src.sin_family, AF_INET);
|
||||
EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
|
||||
struct iphdr iphdr = {};
|
||||
memcpy(&iphdr, recv_buf, sizeof(iphdr));
|
||||
EXPECT_NE(iphdr.id, 0);
|
||||
|
||||
// Also verify that the packet we just sent was not delivered to the
|
||||
// IPPROTO_RAW socket.
|
||||
{
|
||||
char recv_buf[sizeof(packet)];
|
||||
struct sockaddr_in src;
|
||||
socklen_t src_size = sizeof(src);
|
||||
ASSERT_THAT(recvfrom(socket_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
|
||||
reinterpret_cast<struct sockaddr*>(&src), &src_size),
|
||||
SyscallFailsWithErrno(EAGAIN));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace testing
|
||||
|
|
Loading…
Reference in New Issue