Allow for zero byte iovec with MSG_PEEK | MSG_TRUNC in recvmsg.
This allows for peeking at the length of the next message on a netlink socket without pulling it off the socket's buffer/queue, allowing tools like 'ip' to work. This CL also fixes an issue where dump_done_errno was not included in the NLMSG_DONE messages payload. Issue #769 PiperOrigin-RevId: 274068637
This commit is contained in:
parent
c7e901f47a
commit
470997ca99
|
@ -21,6 +21,7 @@ go_library(
|
|||
"//pkg/sentry/fs/fsutil",
|
||||
"//pkg/sentry/kernel",
|
||||
"//pkg/sentry/kernel/time",
|
||||
"//pkg/sentry/safemem",
|
||||
"//pkg/sentry/socket",
|
||||
"//pkg/sentry/socket/netlink/port",
|
||||
"//pkg/sentry/socket/unix",
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
|
||||
"gvisor.dev/gvisor/pkg/sentry/kernel"
|
||||
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
|
||||
"gvisor.dev/gvisor/pkg/sentry/safemem"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
|
||||
|
@ -416,6 +417,24 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have
|
|||
Peek: flags&linux.MSG_PEEK != 0,
|
||||
}
|
||||
|
||||
// If MSG_TRUNC is set with a zero byte destination then we still need
|
||||
// to read the message and discard it, or in the case where MSG_PEEK is
|
||||
// set, leave it be. In both cases the full message length must be
|
||||
// returned. However, the memory manager for the destination will not read
|
||||
// the endpoint if the destination is zero length.
|
||||
//
|
||||
// In order for the endpoint to be read when the destination size is zero,
|
||||
// we must cause a read of the endpoint by using a separate fake zero
|
||||
// length block sequence and calling the EndpointReader directly.
|
||||
if trunc && dst.Addrs.NumBytes() == 0 {
|
||||
// Perform a read to a zero byte block sequence. We can ignore the
|
||||
// original destination since it was zero bytes. The length returned by
|
||||
// ReadToBlocks is ignored and we return the full message length to comply
|
||||
// with MSG_TRUNC.
|
||||
_, err := r.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(make([]byte, 0))))
|
||||
return int(r.MsgSize), linux.MSG_TRUNC, from, fromLen, socket.ControlMessages{}, syserr.FromError(err)
|
||||
}
|
||||
|
||||
if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
|
||||
var mflags int
|
||||
if n < int64(r.MsgSize) {
|
||||
|
@ -499,6 +518,9 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error
|
|||
PortID: uint32(ms.PortID),
|
||||
})
|
||||
|
||||
// Add the dump_done_errno payload.
|
||||
m.Put(int64(0))
|
||||
|
||||
_, notify, err := s.connection.Send([][]byte{m.Finalize()}, transport.ControlMessages{}, tcpip.FullAddress{})
|
||||
if err != nil && err != syserr.ErrWouldBlock {
|
||||
return err
|
||||
|
|
|
@ -539,6 +539,159 @@ TEST(NetlinkRouteTest, GetRouteDump) {
|
|||
EXPECT_TRUE(dstFound);
|
||||
}
|
||||
|
||||
// RecvmsgTrunc tests the recvmsg MSG_TRUNC flag with zero length output
|
||||
// buffer. MSG_TRUNC with a zero length buffer should consume subsequent
|
||||
// messages off the socket.
|
||||
TEST(NetlinkRouteTest, RecvmsgTrunc) {
|
||||
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket());
|
||||
|
||||
struct request {
|
||||
struct nlmsghdr hdr;
|
||||
struct rtgenmsg rgm;
|
||||
};
|
||||
|
||||
constexpr uint32_t kSeq = 12345;
|
||||
|
||||
struct request req;
|
||||
req.hdr.nlmsg_len = sizeof(req);
|
||||
req.hdr.nlmsg_type = RTM_GETADDR;
|
||||
req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
|
||||
req.hdr.nlmsg_seq = kSeq;
|
||||
req.rgm.rtgen_family = AF_UNSPEC;
|
||||
|
||||
struct iovec iov = {};
|
||||
iov.iov_base = &req;
|
||||
iov.iov_len = sizeof(req);
|
||||
|
||||
struct msghdr msg = {};
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
|
||||
|
||||
iov.iov_base = NULL;
|
||||
iov.iov_len = 0;
|
||||
|
||||
int trunclen, trunclen2;
|
||||
|
||||
// Note: This test assumes at least two messages are returned by the
|
||||
// RTM_GETADDR request. That means at least one RTM_NEWLINK message and one
|
||||
// NLMSG_DONE message. We cannot read all the messages without blocking
|
||||
// because we would need to read the message into a buffer and check the
|
||||
// nlmsg_type for NLMSG_DONE. However, the test depends on reading into a
|
||||
// zero-length buffer.
|
||||
|
||||
// First, call recvmsg with MSG_TRUNC. This will read the full message from
|
||||
// the socket and return it's full length. Subsequent calls to recvmsg will
|
||||
// read the next messages from the socket.
|
||||
ASSERT_THAT(trunclen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC),
|
||||
SyscallSucceeds());
|
||||
|
||||
// Message should always be truncated. However, While the destination iov is
|
||||
// zero length, MSG_TRUNC returns the size of the next message so it should
|
||||
// not be zero.
|
||||
ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
|
||||
ASSERT_NE(trunclen, 0);
|
||||
// Returned length is at least the header and ifaddrmsg.
|
||||
EXPECT_GE(trunclen, sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg));
|
||||
|
||||
// Reset the msg_flags to make sure that the recvmsg call is setting them
|
||||
// properly.
|
||||
msg.msg_flags = 0;
|
||||
|
||||
// Make a second recvvmsg call to get the next message.
|
||||
ASSERT_THAT(trunclen2 = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC),
|
||||
SyscallSucceeds());
|
||||
ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
|
||||
ASSERT_NE(trunclen2, 0);
|
||||
|
||||
// Assert that the received messages are not the same.
|
||||
//
|
||||
// We are calling recvmsg with a zero length buffer so we have no way to
|
||||
// inspect the messages to make sure they are not equal in value. The best
|
||||
// we can do is to compare their lengths.
|
||||
ASSERT_NE(trunclen, trunclen2);
|
||||
}
|
||||
|
||||
// RecvmsgTruncPeek tests recvmsg with the combination of the MSG_TRUNC and
|
||||
// MSG_PEEK flags and a zero length output buffer. This is normally used to
|
||||
// read the full length of the next message on the socket without consuming
|
||||
// it, so a properly sized buffer can be allocated to store the message. This
|
||||
// test tests that scenario.
|
||||
TEST(NetlinkRouteTest, RecvmsgTruncPeek) {
|
||||
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket());
|
||||
|
||||
struct request {
|
||||
struct nlmsghdr hdr;
|
||||
struct rtgenmsg rgm;
|
||||
};
|
||||
|
||||
constexpr uint32_t kSeq = 12345;
|
||||
|
||||
struct request req;
|
||||
req.hdr.nlmsg_len = sizeof(req);
|
||||
req.hdr.nlmsg_type = RTM_GETADDR;
|
||||
req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
|
||||
req.hdr.nlmsg_seq = kSeq;
|
||||
req.rgm.rtgen_family = AF_UNSPEC;
|
||||
|
||||
struct iovec iov = {};
|
||||
iov.iov_base = &req;
|
||||
iov.iov_len = sizeof(req);
|
||||
|
||||
struct msghdr msg = {};
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
|
||||
|
||||
int type = -1;
|
||||
do {
|
||||
int peeklen;
|
||||
int len;
|
||||
|
||||
iov.iov_base = NULL;
|
||||
iov.iov_len = 0;
|
||||
|
||||
// Call recvmsg with MSG_PEEK and MSG_TRUNC. This will peek at the message
|
||||
// and return it's full length.
|
||||
// See: MSG_TRUNC http://man7.org/linux/man-pages/man2/recv.2.html
|
||||
ASSERT_THAT(
|
||||
peeklen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_PEEK | MSG_TRUNC),
|
||||
SyscallSucceeds());
|
||||
|
||||
// Message should always be truncated.
|
||||
ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
|
||||
ASSERT_NE(peeklen, 0);
|
||||
|
||||
// Reset the message flags for the next call.
|
||||
msg.msg_flags = 0;
|
||||
|
||||
// Make the actual call to recvmsg to get the actual data. We will use
|
||||
// the length returned from the peek call for the allocated buffer size..
|
||||
std::vector<char> buf(peeklen);
|
||||
iov.iov_base = buf.data();
|
||||
iov.iov_len = buf.size();
|
||||
ASSERT_THAT(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0),
|
||||
SyscallSucceeds());
|
||||
|
||||
// Message should not be truncated since we allocated the correct buffer
|
||||
// size.
|
||||
EXPECT_NE(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
|
||||
|
||||
// MSG_PEEK should have left data on the socket and the subsequent call
|
||||
// with should have retrieved the same data. Both calls should have
|
||||
// returned the message's full length so they should be equal.
|
||||
ASSERT_NE(len, 0);
|
||||
ASSERT_EQ(peeklen, len);
|
||||
|
||||
for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
|
||||
NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
|
||||
type = hdr->nlmsg_type;
|
||||
}
|
||||
} while (type != NLMSG_DONE && type != NLMSG_ERROR);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace testing
|
||||
|
|
|
@ -91,6 +91,13 @@ PosixError NetlinkRequestResponse(
|
|||
NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
|
||||
fn(hdr);
|
||||
type = hdr->nlmsg_type;
|
||||
// Done should include an integer payload for dump_done_errno.
|
||||
// See net/netlink/af_netlink.c:netlink_dump
|
||||
// Some tools like the 'ip' tool check the minimum length of the
|
||||
// NLMSG_DONE message.
|
||||
if (type == NLMSG_DONE) {
|
||||
EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int)));
|
||||
}
|
||||
}
|
||||
} while (type != NLMSG_DONE && type != NLMSG_ERROR);
|
||||
|
||||
|
|
Loading…
Reference in New Issue