gvisor/test/syscalls/linux/tuntap.cc

347 lines
11 KiB
C++

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <arpa/inet.h>
#include <linux/capability.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_split.h"
#include "test/syscalls/linux/socket_netlink_route_util.h"
#include "test/syscalls/linux/socket_test_util.h"
#include "test/util/capability_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/fs_util.h"
#include "test/util/posix_error.h"
#include "test/util/test_util.h"
namespace gvisor {
namespace testing {
namespace {
constexpr int kIPLen = 4;
constexpr const char kDevNetTun[] = "/dev/net/tun";
constexpr const char kTapName[] = "tap0";
constexpr const uint8_t kMacA[ETH_ALEN] = {0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA};
constexpr const uint8_t kMacB[ETH_ALEN] = {0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB};
PosixErrorOr<std::set<std::string>> DumpLinkNames() {
ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
std::set<std::string> names;
for (const auto& link : links) {
names.emplace(link.name);
}
return names;
}
PosixErrorOr<absl::optional<Link>> GetLinkByName(const std::string& name) {
ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
for (const auto& link : links) {
if (link.name == name) {
return absl::optional<Link>(link);
}
}
return absl::optional<Link>();
}
struct pihdr {
uint16_t pi_flags;
uint16_t pi_protocol;
} __attribute__((packed));
struct ping_pkt {
pihdr pi;
struct ethhdr eth;
struct iphdr ip;
struct icmphdr icmp;
char payload[64];
} __attribute__((packed));
ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip,
const uint8_t dstmac[ETH_ALEN], const char* dstip) {
ping_pkt pkt = {};
pkt.pi.pi_protocol = htons(ETH_P_IP);
memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest));
memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source));
pkt.eth.h_proto = htons(ETH_P_IP);
pkt.ip.ihl = 5;
pkt.ip.version = 4;
pkt.ip.tos = 0;
pkt.ip.tot_len = htons(sizeof(struct iphdr) + sizeof(struct icmphdr) +
sizeof(pkt.payload));
pkt.ip.id = 1;
pkt.ip.frag_off = 1 << 6; // Do not fragment
pkt.ip.ttl = 64;
pkt.ip.protocol = IPPROTO_ICMP;
inet_pton(AF_INET, dstip, &pkt.ip.daddr);
inet_pton(AF_INET, srcip, &pkt.ip.saddr);
pkt.ip.check = IPChecksum(pkt.ip);
pkt.icmp.type = ICMP_ECHO;
pkt.icmp.code = 0;
pkt.icmp.checksum = 0;
pkt.icmp.un.echo.sequence = 1;
pkt.icmp.un.echo.id = 1;
strncpy(pkt.payload, "abcd", sizeof(pkt.payload));
pkt.icmp.checksum = ICMPChecksum(pkt.icmp, pkt.payload, sizeof(pkt.payload));
return pkt;
}
struct arp_pkt {
pihdr pi;
struct ethhdr eth;
struct arphdr arp;
uint8_t arp_sha[ETH_ALEN];
uint8_t arp_spa[kIPLen];
uint8_t arp_tha[ETH_ALEN];
uint8_t arp_tpa[kIPLen];
} __attribute__((packed));
std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip,
const uint8_t dstmac[ETH_ALEN], const char* dstip) {
std::string buffer;
buffer.resize(sizeof(arp_pkt));
arp_pkt* pkt = reinterpret_cast<arp_pkt*>(&buffer[0]);
{
pkt->pi.pi_protocol = htons(ETH_P_ARP);
memcpy(pkt->eth.h_dest, kMacA, sizeof(pkt->eth.h_dest));
memcpy(pkt->eth.h_source, kMacB, sizeof(pkt->eth.h_source));
pkt->eth.h_proto = htons(ETH_P_ARP);
pkt->arp.ar_hrd = htons(ARPHRD_ETHER);
pkt->arp.ar_pro = htons(ETH_P_IP);
pkt->arp.ar_hln = ETH_ALEN;
pkt->arp.ar_pln = kIPLen;
pkt->arp.ar_op = htons(ARPOP_REPLY);
memcpy(pkt->arp_sha, srcmac, sizeof(pkt->arp_sha));
inet_pton(AF_INET, srcip, pkt->arp_spa);
memcpy(pkt->arp_tha, dstmac, sizeof(pkt->arp_tha));
inet_pton(AF_INET, dstip, pkt->arp_tpa);
}
return buffer;
}
} // namespace
class TuntapTest : public ::testing::Test {
protected:
void TearDown() override {
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))) {
// Bring back capability if we had dropped it in test case.
ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, true));
}
}
};
TEST_F(TuntapTest, CreateInterfaceNoCap) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, false));
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
struct ifreq ifr = {};
ifr.ifr_flags = IFF_TAP;
strncpy(ifr.ifr_name, kTapName, IFNAMSIZ);
EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallFailsWithErrno(EPERM));
}
TEST_F(TuntapTest, CreateFixedNameInterface) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
struct ifreq ifr_set = {};
ifr_set.ifr_flags = IFF_TAP;
strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ);
EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set),
SyscallSucceedsWithValue(0));
struct ifreq ifr_get = {};
EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get),
SyscallSucceedsWithValue(0));
struct ifreq ifr_expect = ifr_set;
// See __tun_chr_ioctl() in net/drivers/tun.c.
ifr_expect.ifr_flags |= IFF_NOFILTER;
EXPECT_THAT(DumpLinkNames(),
IsPosixErrorOkAndHolds(::testing::Contains(kTapName)));
EXPECT_THAT(memcmp(&ifr_expect, &ifr_get, sizeof(ifr_get)), ::testing::Eq(0));
}
TEST_F(TuntapTest, CreateInterface) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
struct ifreq ifr = {};
ifr.ifr_flags = IFF_TAP;
// Empty ifr.ifr_name. Let kernel assign.
EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0));
struct ifreq ifr_get = {};
EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get),
SyscallSucceedsWithValue(0));
std::string ifname = ifr_get.ifr_name;
EXPECT_THAT(ifname, ::testing::StartsWith("tap"));
EXPECT_THAT(DumpLinkNames(),
IsPosixErrorOkAndHolds(::testing::Contains(ifname)));
}
TEST_F(TuntapTest, InvalidReadWrite) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
char buf[128] = {};
EXPECT_THAT(read(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD));
EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD));
}
TEST_F(TuntapTest, WriteToDownDevice) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
// FIXME: gVisor always creates enabled/up'd interfaces.
SKIP_IF(IsRunningOnGvisor());
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
// Device created should be down by default.
struct ifreq ifr = {};
ifr.ifr_flags = IFF_TAP;
EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0));
char buf[128] = {};
EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO));
}
// This test sets up a TAP device and pings kernel by sending ICMP echo request.
//
// It works as the following:
// * Open /dev/net/tun, and create kTapName interface.
// * Use rtnetlink to do initial setup of the interface:
// * Assign IP address 10.0.0.1/24 to kernel.
// * MAC address: kMacA
// * Bring up the interface.
// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel.
// * Loop to receive packets from TAP device/fd:
// * If packet is an ICMP echo reply, it stops and passes the test.
// * If packet is an ARP request, it responds with canned reply and resends
// the
// ICMP request packet.
TEST_F(TuntapTest, PingKernel) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
// Interface creation.
FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
struct ifreq ifr_set = {};
ifr_set.ifr_flags = IFF_TAP;
strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ);
EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set),
SyscallSucceedsWithValue(0));
absl::optional<Link> link =
ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName(kTapName));
ASSERT_TRUE(link.has_value());
// Interface setup.
struct in_addr addr;
inet_pton(AF_INET, "10.0.0.1", &addr);
EXPECT_NO_ERRNO(LinkAddLocalAddr(link->index, AF_INET, /*prefixlen=*/24,
&addr, sizeof(addr)));
if (!IsRunningOnGvisor()) {
// FIXME: gVisor doesn't support setting MAC address on interfaces yet.
EXPECT_NO_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA)));
// FIXME: gVisor always creates enabled/up'd interfaces.
EXPECT_NO_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP));
}
ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1");
std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1");
// Send ping, this would trigger an ARP request on Linux.
EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
SyscallSucceedsWithValue(sizeof(ping_req)));
// Receive loop to process inbound packets.
struct inpkt {
union {
pihdr pi;
ping_pkt ping;
arp_pkt arp;
};
};
while (1) {
inpkt r = {};
int n = read(fd.get(), &r, sizeof(r));
EXPECT_THAT(n, SyscallSucceeds());
if (n < sizeof(pihdr)) {
std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol
<< " len: " << n << std::endl;
continue;
}
// Process ARP packet.
if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) {
// Respond with canned ARP reply.
EXPECT_THAT(write(fd.get(), arp_rep.data(), arp_rep.size()),
SyscallSucceedsWithValue(arp_rep.size()));
// First ping request might have been dropped due to mac address not in
// ARP cache. Send it again.
EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
SyscallSucceedsWithValue(sizeof(ping_req)));
}
// Process ping response packet.
if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol &&
r.ping.ip.protocol == ping_req.ip.protocol &&
!memcmp(&r.ping.ip.saddr, &ping_req.ip.daddr, kIPLen) &&
!memcmp(&r.ping.ip.daddr, &ping_req.ip.saddr, kIPLen) &&
r.ping.icmp.type == 0 && r.ping.icmp.code == 0) {
// Ends and passes the test.
break;
}
}
}
} // namespace testing
} // namespace gvisor