Stub support for TCP_SYNCNT and TCP_WINDOW_CLAMP.

This change adds support for TCP_SYNCNT and TCP_WINDOW_CLAMP options
in GetSockOpt/SetSockOpt. This change does not really change any
behaviour in Netstack and only stores/returns the stored value.

Actual honoring of these options will be added as required.

Fixes #2626, #2625

PiperOrigin-RevId: 311453777
This commit is contained in:
Bhasker Hariharan 2020-05-13 19:47:42 -07:00 committed by gVisor bot
parent 64afaf0e9b
commit 8b8774d715
6 changed files with 367 additions and 4 deletions

View File

@ -1321,6 +1321,29 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return int32(time.Duration(v) / time.Second), nil
case linux.TCP_SYNCNT:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
}
v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption)
if err != nil {
return nil, syserr.TranslateNetstackError(err)
}
return int32(v), nil
case linux.TCP_WINDOW_CLAMP:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
}
v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption)
if err != nil {
return nil, syserr.TranslateNetstackError(err)
}
return int32(v), nil
default:
emitUnimplementedEventTCP(t, name)
}
@ -1790,6 +1813,22 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
}
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v))))
case linux.TCP_SYNCNT:
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
v := usermem.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v)))
case linux.TCP_WINDOW_CLAMP:
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
v := usermem.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v)))
case linux.TCP_REPAIR_OPTIONS:
t.Kernel().EmitUnimplementedEvent(t)

View File

@ -622,6 +622,19 @@ const (
//
// A zero value indicates the default.
TTLOption
// TCPSynCountOption is used by SetSockOpt/GetSockOpt to specify the number of
// SYN retransmits that TCP should send before aborting the attempt to
// connect. It cannot exceed 255.
//
// NOTE: This option is currently only stubbed out and is no-op.
TCPSynCountOption
// TCPWindowClampOption is used by SetSockOpt/GetSockOpt to bound the size
// of the advertised window to this value.
//
// NOTE: This option is currently only stubed out and is a no-op
TCPWindowClampOption
)
// ErrorOption is used in GetSockOpt to specify that the last error reported by
@ -690,6 +703,10 @@ type TCPMinRTOOption time.Duration
// switches to using SYN cookies.
type TCPSynRcvdCountThresholdOption uint64
// TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
// default for number of times SYN is retransmitted before aborting a connect.
type TCPSynRetriesOption uint8
// MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
// default interface for multicast.
type MulticastInterfaceOption struct {

View File

@ -470,6 +470,17 @@ type endpoint struct {
// for this endpoint using the TCP_MAXSEG setsockopt.
userMSS uint16
// maxSynRetries is the maximum number of SYN retransmits that TCP should
// send before aborting the attempt to connect. It cannot exceed 255.
//
// NOTE: This is currently a no-op and does not change the SYN
// retransmissions.
maxSynRetries uint8
// windowClamp is used to bound the size of the advertised window to
// this value.
windowClamp uint32
// The following fields are used to manage the send buffer. When
// segments are ready to be sent, they are added to sndQueue and the
// protocol goroutine is signaled via sndWaker.
@ -795,8 +806,10 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
interval: 75 * time.Second,
count: 9,
},
uniqueID: s.UniqueID(),
txHash: s.Rand().Uint32(),
uniqueID: s.UniqueID(),
txHash: s.Rand().Uint32(),
windowClamp: DefaultReceiveBufferSize,
maxSynRetries: DefaultSynRetries,
}
var ss SendBufferSizeOption
@ -829,6 +842,11 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.tcpLingerTimeout = time.Duration(tcpLT)
}
var synRetries tcpip.TCPSynRetriesOption
if err := s.TransportProtocolOption(ProtocolNumber, &synRetries); err == nil {
e.maxSynRetries = uint8(synRetries)
}
if p := s.GetTCPProbe(); p != nil {
e.probe = p
}
@ -1603,6 +1621,36 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
e.ttl = uint8(v)
e.UnlockUser()
case tcpip.TCPSynCountOption:
if v < 1 || v > 255 {
return tcpip.ErrInvalidOptionValue
}
e.LockUser()
e.maxSynRetries = uint8(v)
e.UnlockUser()
case tcpip.TCPWindowClampOption:
if v == 0 {
e.LockUser()
switch e.EndpointState() {
case StateClose, StateInitial:
e.windowClamp = 0
e.UnlockUser()
return nil
default:
e.UnlockUser()
return tcpip.ErrInvalidOptionValue
}
}
var rs ReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
if v < rs.Min/2 {
v = rs.Min / 2
}
}
e.LockUser()
e.windowClamp = uint32(v)
e.UnlockUser()
}
return nil
}
@ -1826,6 +1874,18 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.UnlockUser()
return v, nil
case tcpip.TCPSynCountOption:
e.LockUser()
v := int(e.maxSynRetries)
e.UnlockUser()
return v, nil
case tcpip.TCPWindowClampOption:
e.LockUser()
v := int(e.windowClamp)
e.UnlockUser()
return v, nil
default:
return -1, tcpip.ErrUnknownProtocolOption
}

View File

@ -64,6 +64,10 @@ const (
// DefaultTCPTimeWaitTimeout is the amount of time that sockets linger
// in TIME_WAIT state before being marked closed.
DefaultTCPTimeWaitTimeout = 60 * time.Second
// DefaultSynRetries is the default value for the number of SYN retransmits
// before a connect is aborted.
DefaultSynRetries = 6
)
// SACKEnabled option can be used to enable SACK support in the TCP
@ -164,6 +168,7 @@ type protocol struct {
tcpTimeWaitTimeout time.Duration
minRTO time.Duration
synRcvdCount synRcvdCounter
synRetries uint8
dispatcher *dispatcher
}
@ -346,6 +351,15 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
p.mu.Unlock()
return nil
case tcpip.TCPSynRetriesOption:
if v < 1 || v > 255 {
return tcpip.ErrInvalidOptionValue
}
p.mu.Lock()
p.synRetries = uint8(v)
p.mu.Unlock()
return nil
default:
return tcpip.ErrUnknownProtocolOption
}
@ -420,6 +434,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
p.mu.RUnlock()
return nil
case *tcpip.TCPSynRetriesOption:
p.mu.RLock()
*v = tcpip.TCPSynRetriesOption(p.synRetries)
p.mu.RUnlock()
return nil
default:
return tcpip.ErrUnknownProtocolOption
}
@ -452,6 +472,7 @@ func NewProtocol() stack.TransportProtocol {
tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout,
synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold},
dispatcher: newDispatcher(runtime.GOMAXPROCS(0)),
synRetries: DefaultSynRetries,
minRTO: MinRTO,
}
}

View File

@ -876,6 +876,51 @@ TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) {
EXPECT_EQ(get, kAbove);
}
TEST_P(TCPSocketPairTest, SetTCPWindowClampBelowMinRcvBufConnectedSocket) {
auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
// Discover minimum receive buf by setting a really low value
// for the receive buffer.
constexpr int kZero = 0;
EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &kZero,
sizeof(kZero)),
SyscallSucceeds());
// Now retrieve the minimum value for SO_RCVBUF as the set above should
// have caused SO_RCVBUF for the socket to be set to the minimum.
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
int min_so_rcvbuf = get;
{
// Setting TCP_WINDOW_CLAMP to zero for a connected socket is not permitted.
constexpr int kZero = 0;
EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
&kZero, sizeof(kZero)),
SyscallFailsWithErrno(EINVAL));
// Non-zero clamp values below MIN_SO_RCVBUF/2 should result in the clamp
// being set to MIN_SO_RCVBUF/2.
int below_half_min_so_rcvbuf = min_so_rcvbuf / 2 - 1;
EXPECT_THAT(
setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
&below_half_min_so_rcvbuf, sizeof(below_half_min_so_rcvbuf)),
SyscallSucceeds());
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
&get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(min_so_rcvbuf / 2, get);
}
}
TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) {
DisableSave ds; // Too many syscalls.
constexpr int kThreadCount = 1000;

View File

@ -1313,7 +1313,7 @@ TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) {
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, 0);
@ -1326,7 +1326,7 @@ TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) {
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, 0);
@ -1378,6 +1378,187 @@ TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) {
SyscallSucceedsWithValue(0));
}
TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
int default_syn_cnt = get;
{
// TCP_SYNCNT less than 1 should be rejected with an EINVAL.
constexpr int kZero = 0;
EXPECT_THAT(
setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)),
SyscallFailsWithErrno(EINVAL));
// TCP_SYNCNT less than 1 should be rejected with an EINVAL.
constexpr int kNeg = -1;
EXPECT_THAT(
setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)),
SyscallFailsWithErrno(EINVAL));
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(default_syn_cnt, get);
}
}
TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
int get = -1;
socklen_t get_len = sizeof(get);
constexpr int kDefaultSynCnt = 6;
ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, kDefaultSynCnt);
}
TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
constexpr int kTCPSynCnt = 20;
ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
sizeof(kTCPSynCnt)),
SyscallSucceeds());
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
SyscallSucceeds());
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, kTCPSynCnt);
}
TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
int default_syn_cnt = get;
{
constexpr int kTCPSynCnt = 256;
ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
sizeof(kTCPSynCnt)),
SyscallFailsWithErrno(EINVAL));
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
SyscallSucceeds());
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, default_syn_cnt);
}
}
TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
// Discover minimum receive buf by setting a really low value
// for the receive buffer.
constexpr int kZero = 0;
EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
SyscallSucceeds());
// Now retrieve the minimum value for SO_RCVBUF as the set above should
// have caused SO_RCVBUF for the socket to be set to the minimum.
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
int min_so_rcvbuf = get;
{
// TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to
// min_so_rcvbuf/2.
int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1;
EXPECT_THAT(
setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
&below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)),
SyscallSucceeds());
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(min_so_rcvbuf / 2, get);
}
}
TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
constexpr int kZero = 0;
ASSERT_THAT(
setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)),
SyscallSucceeds());
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
SyscallSucceeds());
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, kZero);
}
TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) {
FileDescriptor s =
ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
// Discover minimum receive buf by setting a really low value
// for the receive buffer.
constexpr int kZero = 0;
EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
SyscallSucceeds());
// Now retrieve the minimum value for SO_RCVBUF as the set above should
// have caused SO_RCVBUF for the socket to be set to the minimum.
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
int min_so_rcvbuf = get;
{
int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1;
EXPECT_THAT(
setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
&above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)),
SyscallSucceeds());
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(above_half_min_rcv_buf, get);
}
}
INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
::testing::Values(AF_INET, AF_INET6));