diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 333e0042e..8f0f5466e 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -50,5 +50,6 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/usermem", "//pkg/waiter", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 60df51dae..e1e0c5931 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -33,6 +33,7 @@ import ( "syscall" "time" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/binary" @@ -719,6 +720,14 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool defer s.EventUnregister(&e) if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting { + if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM { + // TCP unlike UDP returns EADDRNOTAVAIL when it can't + // find an available local ephemeral port. + if err == tcpip.ErrNoPortAvailable { + return syserr.ErrAddressNotAvailable + } + } + return syserr.TranslateNetstackError(err) } diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 3bc72bc19..57e0a069b 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -506,6 +506,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { nicID := addr.NIC localPort := uint16(0) switch e.state { + case stateInitial: case stateBound, stateConnected: localPort = e.ID.LocalPort if e.BindNICID == 0 { diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 3406a2de8..d68afbe44 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -400,6 +400,14 @@ syscall_test( vfs2 = "True", ) +syscall_test( + size = "medium", + # Takes too long under gotsan to run. + tags = ["nogotsan"], + test = "//test/syscalls/linux:ping_socket_test", + vfs2 = "True", +) + syscall_test( size = "large", add_overlay = True, @@ -697,6 +705,14 @@ syscall_test( test = "//test/syscalls/linux:socket_inet_loopback_test", ) +syscall_test( + size = "large", + shard_count = 50, + # Takes too long for TSAN. Creates a lot of TCP sockets. + tags = ["nogotsan"], + test = "//test/syscalls/linux:socket_inet_loopback_nogotsan_test", +) + syscall_test( size = "large", shard_count = 50, diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index f4b5de18d..ae2aa44dc 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1411,6 +1411,21 @@ cc_binary( ], ) +cc_binary( + name = "ping_socket_test", + testonly = 1, + srcs = ["ping_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_binary( name = "pipe_test", testonly = 1, @@ -2780,6 +2795,26 @@ cc_binary( ], ) +cc_binary( + name = "socket_inet_loopback_nogotsan_test", + testonly = 1, + srcs = ["socket_inet_loopback_nogotsan.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + cc_binary( name = "socket_netlink_test", testonly = 1, diff --git a/test/syscalls/linux/ping_socket.cc b/test/syscalls/linux/ping_socket.cc new file mode 100644 index 000000000..a9bfdb37b --- /dev/null +++ b/test/syscalls/linux/ping_socket.cc @@ -0,0 +1,91 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class PingSocket : public ::testing::Test { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // The loopback address. + struct sockaddr_in addr_; +}; + +void PingSocket::SetUp() { + // On some hosts ping sockets are restricted to specific groups using the + // sysctl "ping_group_range". + int s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + if (s < 0 && errno == EPERM) { + GTEST_SKIP(); + } + close(s); + + addr_ = {}; + // Just a random port as the destination port number is irrelevant for ping + // sockets. + addr_.sin_port = 12345; + addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_.sin_family = AF_INET; +} + +void PingSocket::TearDown() {} + +// Test ICMP port exhaustion returns EAGAIN. +// +// We disable both random/cooperative S/R for this test as it makes way too many +// syscalls. +TEST_F(PingSocket, ICMPPortExhaustion_NoRandomSave) { + DisableSave ds; + std::vector sockets; + constexpr int kSockets = 65536; + addr_.sin_port = 0; + for (int i = 0; i < kSockets; i++) { + auto s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); + int ret = connect(s.get(), reinterpret_cast(&addr_), + sizeof(addr_)); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN)); + break; + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc index 1e35a4a8b..7a316427d 100644 --- a/test/syscalls/linux/poll.cc +++ b/test/syscalls/linux/poll.cc @@ -259,9 +259,9 @@ TEST_F(PollTest, Nfds) { TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0); // gVisor caps the number of FDs that epoll can use beyond RLIMIT_NOFILE. - constexpr rlim_t gVisorMax = 1048576; - if (rlim.rlim_cur > gVisorMax) { - rlim.rlim_cur = gVisorMax; + constexpr rlim_t maxFD = 4096; + if (rlim.rlim_cur > maxFD) { + rlim.rlim_cur = maxFD; TEST_PCHECK(setrlimit(RLIMIT_NOFILE, &rlim) == 0); } diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc new file mode 100644 index 000000000..2324c7f6a --- /dev/null +++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc @@ -0,0 +1,171 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::Gt; + +PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { + switch (family) { + case AF_INET: + return static_cast( + reinterpret_cast(&addr)->sin_port); + case AF_INET6: + return static_cast( + reinterpret_cast(&addr)->sin6_port); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { + switch (family) { + case AF_INET: + reinterpret_cast(addr)->sin_port = port; + return NoError(); + case AF_INET6: + reinterpret_cast(addr)->sin6_port = port; + return NoError(); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +struct TestParam { + TestAddress listener; + TestAddress connector; +}; + +std::string DescribeTestParam(::testing::TestParamInfo const& info) { + return absl::StrCat("Listen", info.param.listener.description, "_Connect", + info.param.connector.description); +} + +using SocketInetLoopbackTest = ::testing::TestWithParam; + +// This test verifies that connect returns EADDRNOTAVAIL if all local ephemeral +// ports are already in use for a given destination ip/port. +// We disable S/R because this test creates a large number of sockets. +TEST_P(SocketInetLoopbackTest, TestTCPPortExhaustion_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 10; + constexpr int kClients = 65536; + + // Create the listening socket. + auto listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + + // Now we keep opening connections till we run out of local ephemeral ports. + // and assert the error we get back. + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + std::vector clients; + std::vector servers; + + for (int i = 0; i < kClients; i++) { + FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + int ret = connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len); + if (ret == 0) { + clients.push_back(std::move(client)); + FileDescriptor server = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + servers.push_back(std::move(server)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRNOTAVAIL)); + break; + } +} + +INSTANTIATE_TEST_SUITE_P( + All, SocketInetLoopbackTest, + ::testing::Values( + // Listeners bound to IPv4 addresses refuse connections using IPv6 + // addresses. + TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()}, + TestParam{V4Any(), V4MappedAny()}, + TestParam{V4Any(), V4MappedLoopback()}, + TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()}, + TestParam{V4Loopback(), V4MappedLoopback()}, + TestParam{V4MappedAny(), V4Any()}, + TestParam{V4MappedAny(), V4Loopback()}, + TestParam{V4MappedAny(), V4MappedAny()}, + TestParam{V4MappedAny(), V4MappedLoopback()}, + TestParam{V4MappedLoopback(), V4Any()}, + TestParam{V4MappedLoopback(), V4Loopback()}, + TestParam{V4MappedLoopback(), V4MappedLoopback()}, + + // Listeners bound to IN6ADDR_ANY accept all connections. + TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()}, + TestParam{V6Any(), V4MappedAny()}, + TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()}, + TestParam{V6Any(), V6Loopback()}, + + // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4 + // addresses. + TestParam{V6Loopback(), V6Any()}, + TestParam{V6Loopback(), V6Loopback()}), + DescribeTestParam); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index bc4b07a62..1294d9050 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -2129,6 +2129,39 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { SyscallSucceedsWithValue(kMessageSize)); } +// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports. +// We disable S/R because this test creates a large number of sockets. +TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) { + auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + constexpr int kClients = 65536; + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(receiver1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(receiver1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + std::vector> sockets; + for (int i = 0; i < kClients; i++) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int ret = connect(s->get(), reinterpret_cast(&addr.addr), + addr.addr_len); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN)); + break; + } +} + // Test that socket will receive packet info control message. TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) { // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.