Fix error code returned due to Port exhaustion.

For TCP sockets gVisor incorrectly returns EAGAIN when no ephemeral ports are
available to bind during a connect. Linux returns EADDRNOTAVAIL. This change
fixes gVisor to return the correct code and adds a test for the same.

This change also fixes a minor bug for ping sockets where connect() would fail
with EINVAL unless the socket was bound first.

Also added tests for testing UDP Port exhaustion and Ping socket port
exhaustion.

PiperOrigin-RevId: 314988525
This commit is contained in:
Bhasker Hariharan 2020-06-05 13:41:19 -07:00 committed by gVisor bot
parent 45bf7492ef
commit 526df4f52a
9 changed files with 360 additions and 3 deletions

View File

@ -50,5 +50,6 @@ go_library(
"//pkg/tcpip/transport/udp",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
],
)

View File

@ -33,6 +33,7 @@ import (
"syscall"
"time"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/binary"
@ -719,6 +720,14 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
defer s.EventUnregister(&e)
if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting {
if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM {
// TCP unlike UDP returns EADDRNOTAVAIL when it can't
// find an available local ephemeral port.
if err == tcpip.ErrNoPortAvailable {
return syserr.ErrAddressNotAvailable
}
}
return syserr.TranslateNetstackError(err)
}

View File

@ -506,6 +506,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
nicID := addr.NIC
localPort := uint16(0)
switch e.state {
case stateInitial:
case stateBound, stateConnected:
localPort = e.ID.LocalPort
if e.BindNICID == 0 {

View File

@ -400,6 +400,14 @@ syscall_test(
vfs2 = "True",
)
syscall_test(
size = "medium",
# Takes too long under gotsan to run.
tags = ["nogotsan"],
test = "//test/syscalls/linux:ping_socket_test",
vfs2 = "True",
)
syscall_test(
size = "large",
add_overlay = True,
@ -697,6 +705,14 @@ syscall_test(
test = "//test/syscalls/linux:socket_inet_loopback_test",
)
syscall_test(
size = "large",
shard_count = 50,
# Takes too long for TSAN. Creates a lot of TCP sockets.
tags = ["nogotsan"],
test = "//test/syscalls/linux:socket_inet_loopback_nogotsan_test",
)
syscall_test(
size = "large",
shard_count = 50,

View File

@ -1411,6 +1411,21 @@ cc_binary(
],
)
cc_binary(
name = "ping_socket_test",
testonly = 1,
srcs = ["ping_socket.cc"],
linkstatic = 1,
deps = [
":socket_test_util",
"//test/util:file_descriptor",
gtest,
"//test/util:save_util",
"//test/util:test_main",
"//test/util:test_util",
],
)
cc_binary(
name = "pipe_test",
testonly = 1,
@ -2780,6 +2795,26 @@ cc_binary(
],
)
cc_binary(
name = "socket_inet_loopback_nogotsan_test",
testonly = 1,
srcs = ["socket_inet_loopback_nogotsan.cc"],
linkstatic = 1,
deps = [
":ip_socket_test_util",
":socket_test_util",
"//test/util:file_descriptor",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
gtest,
"//test/util:posix_error",
"//test/util:save_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
],
)
cc_binary(
name = "socket_netlink_test",
testonly = 1,

View File

@ -0,0 +1,91 @@
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <vector>
#include "gtest/gtest.h"
#include "test/syscalls/linux/socket_test_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/save_util.h"
#include "test/util/test_util.h"
namespace gvisor {
namespace testing {
namespace {
class PingSocket : public ::testing::Test {
protected:
// Creates a socket to be used in tests.
void SetUp() override;
// Closes the socket created by SetUp().
void TearDown() override;
// The loopback address.
struct sockaddr_in addr_;
};
void PingSocket::SetUp() {
// On some hosts ping sockets are restricted to specific groups using the
// sysctl "ping_group_range".
int s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
if (s < 0 && errno == EPERM) {
GTEST_SKIP();
}
close(s);
addr_ = {};
// Just a random port as the destination port number is irrelevant for ping
// sockets.
addr_.sin_port = 12345;
addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr_.sin_family = AF_INET;
}
void PingSocket::TearDown() {}
// Test ICMP port exhaustion returns EAGAIN.
//
// We disable both random/cooperative S/R for this test as it makes way too many
// syscalls.
TEST_F(PingSocket, ICMPPortExhaustion_NoRandomSave) {
DisableSave ds;
std::vector<FileDescriptor> sockets;
constexpr int kSockets = 65536;
addr_.sin_port = 0;
for (int i = 0; i < kSockets; i++) {
auto s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
int ret = connect(s.get(), reinterpret_cast<struct sockaddr*>(&addr_),
sizeof(addr_));
if (ret == 0) {
sockets.push_back(std::move(s));
continue;
}
ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN));
break;
}
}
} // namespace
} // namespace testing
} // namespace gvisor

View File

@ -259,9 +259,9 @@ TEST_F(PollTest, Nfds) {
TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
// gVisor caps the number of FDs that epoll can use beyond RLIMIT_NOFILE.
constexpr rlim_t gVisorMax = 1048576;
if (rlim.rlim_cur > gVisorMax) {
rlim.rlim_cur = gVisorMax;
constexpr rlim_t maxFD = 4096;
if (rlim.rlim_cur > maxFD) {
rlim.rlim_cur = maxFD;
TEST_PCHECK(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
}

View File

@ -0,0 +1,171 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <string.h>
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "test/syscalls/linux/ip_socket_test_util.h"
#include "test/syscalls/linux/socket_test_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
#include "test/util/save_util.h"
#include "test/util/test_util.h"
namespace gvisor {
namespace testing {
namespace {
using ::testing::Gt;
PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) {
switch (family) {
case AF_INET:
return static_cast<uint16_t>(
reinterpret_cast<sockaddr_in const*>(&addr)->sin_port);
case AF_INET6:
return static_cast<uint16_t>(
reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port);
default:
return PosixError(EINVAL,
absl::StrCat("unknown socket family: ", family));
}
}
PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) {
switch (family) {
case AF_INET:
reinterpret_cast<sockaddr_in*>(addr)->sin_port = port;
return NoError();
case AF_INET6:
reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port;
return NoError();
default:
return PosixError(EINVAL,
absl::StrCat("unknown socket family: ", family));
}
}
struct TestParam {
TestAddress listener;
TestAddress connector;
};
std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) {
return absl::StrCat("Listen", info.param.listener.description, "_Connect",
info.param.connector.description);
}
using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>;
// This test verifies that connect returns EADDRNOTAVAIL if all local ephemeral
// ports are already in use for a given destination ip/port.
// We disable S/R because this test creates a large number of sockets.
TEST_P(SocketInetLoopbackTest, TestTCPPortExhaustion_NoRandomSave) {
auto const& param = GetParam();
TestAddress const& listener = param.listener;
TestAddress const& connector = param.connector;
constexpr int kBacklog = 10;
constexpr int kClients = 65536;
// Create the listening socket.
auto listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
sockaddr_storage listen_addr = listener.addr;
ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
listener.addr_len),
SyscallSucceeds());
ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
// Get the port bound by the listening socket.
socklen_t addrlen = listener.addr_len;
ASSERT_THAT(getsockname(listen_fd.get(),
reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
SyscallSucceeds());
uint16_t const port =
ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
// Disable cooperative S/R as we are making too many syscalls.
DisableSave ds;
// Now we keep opening connections till we run out of local ephemeral ports.
// and assert the error we get back.
sockaddr_storage conn_addr = connector.addr;
ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
std::vector<FileDescriptor> clients;
std::vector<FileDescriptor> servers;
for (int i = 0; i < kClients; i++) {
FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE(
Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
connector.addr_len);
if (ret == 0) {
clients.push_back(std::move(client));
FileDescriptor server =
ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
servers.push_back(std::move(server));
continue;
}
ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRNOTAVAIL));
break;
}
}
INSTANTIATE_TEST_SUITE_P(
All, SocketInetLoopbackTest,
::testing::Values(
// Listeners bound to IPv4 addresses refuse connections using IPv6
// addresses.
TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()},
TestParam{V4Any(), V4MappedAny()},
TestParam{V4Any(), V4MappedLoopback()},
TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()},
TestParam{V4Loopback(), V4MappedLoopback()},
TestParam{V4MappedAny(), V4Any()},
TestParam{V4MappedAny(), V4Loopback()},
TestParam{V4MappedAny(), V4MappedAny()},
TestParam{V4MappedAny(), V4MappedLoopback()},
TestParam{V4MappedLoopback(), V4Any()},
TestParam{V4MappedLoopback(), V4Loopback()},
TestParam{V4MappedLoopback(), V4MappedLoopback()},
// Listeners bound to IN6ADDR_ANY accept all connections.
TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()},
TestParam{V6Any(), V4MappedAny()},
TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()},
TestParam{V6Any(), V6Loopback()},
// Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4
// addresses.
TestParam{V6Loopback(), V6Any()},
TestParam{V6Loopback(), V6Loopback()}),
DescribeTestParam);
} // namespace
} // namespace testing
} // namespace gvisor

View File

@ -2129,6 +2129,39 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) {
SyscallSucceedsWithValue(kMessageSize));
}
// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports.
// We disable S/R because this test creates a large number of sockets.
TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) {
auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
constexpr int kClients = 65536;
// Bind the first socket to the loopback and take note of the selected port.
auto addr = V4Loopback();
ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
addr.addr_len),
SyscallSucceeds());
socklen_t addr_len = addr.addr_len;
ASSERT_THAT(getsockname(receiver1->get(),
reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
SyscallSucceeds());
EXPECT_EQ(addr_len, addr.addr_len);
// Disable cooperative S/R as we are making too many syscalls.
DisableSave ds;
std::vector<std::unique_ptr<FileDescriptor>> sockets;
for (int i = 0; i < kClients; i++) {
auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
int ret = connect(s->get(), reinterpret_cast<sockaddr*>(&addr.addr),
addr.addr_len);
if (ret == 0) {
sockets.push_back(std::move(s));
continue;
}
ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN));
break;
}
}
// Test that socket will receive packet info control message.
TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) {
// TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.