gvisor/pkg/tcpip/transport/tcp/tcp_test.go

4896 lines
137 KiB
Go
Raw Normal View History

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tcp_test
import (
"bytes"
"fmt"
"math"
"testing"
"time"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/checker"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
"gvisor.dev/gvisor/pkg/tcpip/ports"
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
"gvisor.dev/gvisor/pkg/waiter"
)
const (
// defaultMTU is the MTU, in bytes, used throughout the tests, except
// where another value is explicitly used. It is chosen to match the MTU
// of loopback interfaces on linux systems.
defaultMTU = 65535
// defaultIPv4MSS is the MSS sent by the network stack in SYN/SYN-ACK for an
// IPv4 endpoint when the MTU is set to defaultMTU in the test.
defaultIPv4MSS = defaultMTU - header.IPv4MinimumSize - header.TCPMinimumSize
)
func TestGiveUpConnect(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
var wq waiter.Queue
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
// Register for notification, then start connection attempt.
waitEntry, notifyCh := waiter.NewChannelEntry(nil)
wq.EventRegister(&waitEntry, waiter.EventOut)
defer wq.EventUnregister(&waitEntry)
if err := ep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("got ep.Connect(...) = %v, want = %v", err, tcpip.ErrConnectStarted)
}
// Close the connection, wait for completion.
ep.Close()
// Wait for ep to become writable.
<-notifyCh
if err := ep.GetSockOpt(tcpip.ErrorOption{}); err != tcpip.ErrAborted {
t.Fatalf("got ep.GetSockOpt(tcpip.ErrorOption{}) = %v, want = %v", err, tcpip.ErrAborted)
}
}
func TestConnectIncrementActiveConnection(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
want := stats.TCP.ActiveConnectionOpenings.Value() + 1
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
if got := stats.TCP.ActiveConnectionOpenings.Value(); got != want {
t.Errorf("got stats.TCP.ActtiveConnectionOpenings.Value() = %v, want = %v", got, want)
}
}
func TestConnectDoesNotIncrementFailedConnectionAttempts(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
want := stats.TCP.FailedConnectionAttempts.Value()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
if got := stats.TCP.FailedConnectionAttempts.Value(); got != want {
t.Errorf("got stats.TCP.FailedConnectionAttempts.Value() = %v, want = %v", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).FailedConnectionAttempts.Value(); got != want {
t.Errorf("got EP stats.FailedConnectionAttempts = %v, want = %v", got, want)
}
}
func TestActiveFailedConnectionAttemptIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
c.EP = ep
want := stats.TCP.FailedConnectionAttempts.Value() + 1
if err := c.EP.Connect(tcpip.FullAddress{NIC: 2, Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrNoRoute {
t.Errorf("got c.EP.Connect(...) = %v, want = %v", err, tcpip.ErrNoRoute)
}
if got := stats.TCP.FailedConnectionAttempts.Value(); got != want {
t.Errorf("got stats.TCP.FailedConnectionAttempts.Value() = %v, want = %v", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).FailedConnectionAttempts.Value(); got != want {
t.Errorf("got EP stats FailedConnectionAttempts = %v, want = %v", got, want)
}
}
func TestTCPSegmentsSentIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
// SYN and ACK
want := stats.TCP.SegmentsSent.Value() + 2
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
if got := stats.TCP.SegmentsSent.Value(); got != want {
t.Errorf("got stats.TCP.SegmentsSent.Value() = %v, want = %v", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).SegmentsSent.Value(); got != want {
t.Errorf("got EP stats SegmentsSent.Value() = %v, want = %v", got, want)
}
}
func TestTCPResetsSentIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
want := stats.TCP.SegmentsSent.Value() + 1
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Send a SYN request.
iss := seqnum.Value(789)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: iss,
})
// Receive the SYN-ACK reply.
b := c.GetPacket()
tcpHdr := header.TCP(header.IPv4(b).Payload())
c.IRS = seqnum.Value(tcpHdr.SequenceNumber())
ackHeaders := &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagAck,
SeqNum: iss + 1,
// If the AckNum is not the increment of the last sequence number, a RST
// segment is sent back in response.
AckNum: c.IRS + 2,
}
// Send ACK.
c.SendPacket(nil, ackHeaders)
c.GetPacket()
if got := stats.TCP.ResetsSent.Value(); got != want {
t.Errorf("got stats.TCP.ResetsSent.Value() = %v, want = %v", got, want)
}
}
// TestTCPResetSentForACKWhenNotUsingSynCookies checks that the stack generates
// a RST if an ACK is received on the listening socket for which there is no
// active handshake in progress and we are not using SYN cookies.
func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Send a SYN request.
iss := seqnum.Value(789)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: iss,
})
// Receive the SYN-ACK reply.
b := c.GetPacket()
tcpHdr := header.TCP(header.IPv4(b).Payload())
c.IRS = seqnum.Value(tcpHdr.SequenceNumber())
ackHeaders := &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagAck,
SeqNum: iss + 1,
AckNum: c.IRS + 1,
}
// Send ACK.
c.SendPacket(nil, ackHeaders)
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
wq.EventRegister(&we, waiter.EventIn)
defer wq.EventUnregister(&we)
c.EP, _, err = ep.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
c.EP, _, err = ep.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
c.EP.Close()
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS+1)),
checker.AckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck)))
finHeaders := &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: iss + 1,
AckNum: c.IRS + 2,
}
c.SendPacket(nil, finHeaders)
// Get the ACK to the FIN we just sent.
c.GetPacket()
// Now resend the same ACK, this ACK should generate a RST as there
// should be no endpoint in SYN-RCVD state and we are not using
// syn-cookies yet. The reason we send the same ACK is we need a valid
// cookie(IRS) generated by the netstack without which the ACK will be
// rejected.
c.SendPacket(nil, ackHeaders)
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS+1)),
checker.AckNum(uint32(iss)+1),
checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck)))
}
func TestTCPResetsReceivedIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
want := stats.TCP.ResetsReceived.Value() + 1
iss := seqnum.Value(789)
rcvWnd := seqnum.Size(30000)
c.CreateConnected(iss, rcvWnd, -1 /* epRcvBuf */)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
SeqNum: iss.Add(1),
AckNum: c.IRS.Add(1),
RcvWnd: rcvWnd,
Flags: header.TCPFlagRst,
})
if got := stats.TCP.ResetsReceived.Value(); got != want {
t.Errorf("got stats.TCP.ResetsReceived.Value() = %v, want = %v", got, want)
}
}
func TestTCPResetsDoNotGenerateResets(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
want := stats.TCP.ResetsReceived.Value() + 1
iss := seqnum.Value(789)
rcvWnd := seqnum.Size(30000)
c.CreateConnected(iss, rcvWnd, -1 /* epRcvBuf */)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
SeqNum: iss.Add(1),
AckNum: c.IRS.Add(1),
RcvWnd: rcvWnd,
Flags: header.TCPFlagRst,
})
if got := stats.TCP.ResetsReceived.Value(); got != want {
t.Errorf("got stats.TCP.ResetsReceived.Value() = %v, want = %v", got, want)
}
c.CheckNoPacketTimeout("got an unexpected packet", 100*time.Millisecond)
}
func TestActiveHandshake(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
}
func TestNonBlockingClose(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
ep := c.EP
c.EP = nil
// Close the endpoint and measure how long it takes.
t0 := time.Now()
ep.Close()
if diff := time.Now().Sub(t0); diff > 3*time.Second {
t.Fatalf("Took too long to close: %v", diff)
}
}
func TestConnectResetAfterClose(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
ep := c.EP
c.EP = nil
// Close the endpoint, make sure we get a FIN segment, then acknowledge
// to complete closure of sender, but don't send our own FIN.
ep.Close()
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Wait for the ep to give up waiting for a FIN, and send a RST.
time.Sleep(3 * time.Second)
for {
b := c.GetPacket()
tcpHdr := header.TCP(header.IPv4(b).Payload())
if tcpHdr.Flags() == header.TCPFlagAck|header.TCPFlagFin {
// This is a retransmit of the FIN, ignore it.
continue
}
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst),
),
)
break
}
}
func TestSimpleReceive(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
data := []byte{1, 2, 3}
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
// Receive data.
v, _, err := c.EP.Read(nil)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
if !bytes.Equal(data, v) {
t.Fatalf("got data = %v, want = %v", v, data)
}
// Check that ACK is received.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
// TestUserSuppliedMSSOnConnectV4 tests that the user supplied MSS is used when
// creating a new active IPv4 TCP socket. It should be present in the sent TCP
// SYN segment.
func TestUserSuppliedMSSOnConnectV4(t *testing.T) {
const mtu = 5000
const maxMSS = mtu - header.IPv4MinimumSize - header.TCPMinimumSize
tests := []struct {
name string
setMSS uint16
expMSS uint16
}{
{
"EqualToMaxMSS",
maxMSS,
maxMSS,
},
{
"LessThanMTU",
maxMSS - 1,
maxMSS - 1,
},
{
"GreaterThanMTU",
maxMSS + 1,
maxMSS,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := context.New(t, mtu)
defer c.Cleanup()
c.Create(-1)
// Set the MSS socket option.
opt := tcpip.MaxSegOption(test.setMSS)
if err := c.EP.SetSockOpt(opt); err != nil {
t.Fatalf("SetSockOpt(%#v) failed: %s", opt, err)
}
// Get expected window size.
rcvBufSize, err := c.EP.GetSockOptInt(tcpip.ReceiveBufferSizeOption)
if err != nil {
t.Fatalf("GetSockOpt(%v) failed: %s", tcpip.ReceiveBufferSizeOption, err)
}
ws := tcp.FindWndScale(seqnum.Size(rcvBufSize))
// Start connection attempt to IPv4 address.
if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("Unexpected return value from Connect: %v", err)
}
// Receive SYN packet with our user supplied MSS.
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagSyn),
checker.TCPSynOptions(header.TCPSynOptions{MSS: test.expMSS, WS: ws})))
})
}
}
// TestUserSuppliedMSSOnConnectV6 tests that the user supplied MSS is used when
// creating a new active IPv6 TCP socket. It should be present in the sent TCP
// SYN segment.
func TestUserSuppliedMSSOnConnectV6(t *testing.T) {
const mtu = 5000
const maxMSS = mtu - header.IPv6MinimumSize - header.TCPMinimumSize
tests := []struct {
name string
setMSS uint16
expMSS uint16
}{
{
"EqualToMaxMSS",
maxMSS,
maxMSS,
},
{
"LessThanMTU",
maxMSS - 1,
maxMSS - 1,
},
{
"GreaterThanMTU",
maxMSS + 1,
maxMSS,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := context.New(t, mtu)
defer c.Cleanup()
c.CreateV6Endpoint(true)
// Set the MSS socket option.
opt := tcpip.MaxSegOption(test.setMSS)
if err := c.EP.SetSockOpt(opt); err != nil {
t.Fatalf("SetSockOpt(%#v) failed: %s", opt, err)
}
// Get expected window size.
rcvBufSize, err := c.EP.GetSockOptInt(tcpip.ReceiveBufferSizeOption)
if err != nil {
t.Fatalf("GetSockOpt(%v) failed: %s", tcpip.ReceiveBufferSizeOption, err)
}
ws := tcp.FindWndScale(seqnum.Size(rcvBufSize))
// Start connection attempt to IPv6 address.
if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestV6Addr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("Unexpected return value from Connect: %v", err)
}
// Receive SYN packet with our user supplied MSS.
checker.IPv6(t, c.GetV6Packet(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagSyn),
checker.TCPSynOptions(header.TCPSynOptions{MSS: test.expMSS, WS: ws})))
})
}
}
func TestSendRstOnListenerRxSynAckV4(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.Create(-1)
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatal("Bind failed:", err)
}
if err := c.EP.Listen(10); err != nil {
t.Fatal("Listen failed:", err)
}
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn | header.TCPFlagAck,
SeqNum: 100,
AckNum: 200,
})
checker.IPv4(t, c.GetPacket(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst),
checker.SeqNum(200)))
}
func TestSendRstOnListenerRxSynAckV6(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateV6Endpoint(true)
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatal("Bind failed:", err)
}
if err := c.EP.Listen(10); err != nil {
t.Fatal("Listen failed:", err)
}
c.SendV6Packet(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn | header.TCPFlagAck,
SeqNum: 100,
AckNum: 200,
})
checker.IPv6(t, c.GetV6Packet(), checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagRst),
checker.SeqNum(200)))
}
func TestTOSV4(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %s", err)
}
c.EP = ep
const tos = 0xC0
if err := c.EP.SetSockOpt(tcpip.IPv4TOSOption(tos)); err != nil {
t.Errorf("SetSockOpt(%#v) failed: %s", tcpip.IPv4TOSOption(tos), err)
}
var v tcpip.IPv4TOSOption
if err := c.EP.GetSockOpt(&v); err != nil {
t.Errorf("GetSockopt failed: %s", err)
}
if want := tcpip.IPv4TOSOption(tos); v != want {
t.Errorf("got GetSockOpt(...) = %#v, want = %#v", v, want)
}
testV4Connect(t, c, checker.TOS(tos, 0))
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
// Check that data is received.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790), // Acknum is initial sequence number + 1
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
checker.TOS(tos, 0),
)
if p := b[header.IPv4MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(data, p) {
t.Errorf("got data = %x, want = %x", p, data)
}
}
func TestTrafficClassV6(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateV6Endpoint(false)
const tos = 0xC0
if err := c.EP.SetSockOpt(tcpip.IPv6TrafficClassOption(tos)); err != nil {
t.Errorf("SetSockOpt(%#v) failed: %s", tcpip.IPv6TrafficClassOption(tos), err)
}
var v tcpip.IPv6TrafficClassOption
if err := c.EP.GetSockOpt(&v); err != nil {
t.Fatalf("GetSockopt failed: %s", err)
}
if want := tcpip.IPv6TrafficClassOption(tos); v != want {
t.Errorf("got GetSockOpt(...) = %#v, want = %#v", v, want)
}
// Test the connection request.
testV6Connect(t, c, checker.TOS(tos, 0))
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
// Check that data is received.
b := c.GetV6Packet()
checker.IPv6(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
checker.TOS(tos, 0),
)
if p := b[header.IPv6MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(data, p) {
t.Errorf("got data = %x, want = %x", p, data)
}
}
func TestConnectBindToDevice(t *testing.T) {
for _, test := range []struct {
name string
device string
want tcp.EndpointState
}{
{"RightDevice", "nic1", tcp.StateEstablished},
{"WrongDevice", "nic2", tcp.StateSynSent},
{"AnyDevice", "", tcp.StateEstablished},
} {
t.Run(test.name, func(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.Create(-1)
bindToDevice := tcpip.BindToDeviceOption(test.device)
c.EP.SetSockOpt(bindToDevice)
// Start connection attempt.
waitEntry, _ := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&waitEntry, waiter.EventOut)
defer c.WQ.EventUnregister(&waitEntry)
if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("Unexpected return value from Connect: %v", err)
}
// Receive SYN packet.
b := c.GetPacket()
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagSyn),
),
)
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateSynSent; got != want {
t.Fatalf("Unexpected endpoint state: want %v, got %v", want, got)
}
tcpHdr := header.TCP(header.IPv4(b).Payload())
c.IRS = seqnum.Value(tcpHdr.SequenceNumber())
iss := seqnum.Value(789)
rcvWnd := seqnum.Size(30000)
c.SendPacket(nil, &context.Headers{
SrcPort: tcpHdr.DestinationPort(),
DstPort: tcpHdr.SourcePort(),
Flags: header.TCPFlagSyn | header.TCPFlagAck,
SeqNum: iss,
AckNum: c.IRS.Add(1),
RcvWnd: rcvWnd,
TCPOpts: nil,
})
c.GetPacket()
if got, want := tcp.EndpointState(c.EP.State()), test.want; got != want {
t.Fatalf("Unexpected endpoint state: want %v, got %v", want, got)
}
})
}
}
func TestOutOfOrderReceive(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
// Send second half of data first, with seqnum 3 ahead of expected.
data := []byte{1, 2, 3, 4, 5, 6}
c.SendPacket(data[3:], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 793,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Check that we get an ACK specifying which seqnum is expected.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck),
),
)
// Wait 200ms and check that no data has been received.
time.Sleep(200 * time.Millisecond)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
// Send the first 3 bytes now.
c.SendPacket(data[:3], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Receive data.
read := make([]byte, 0, 6)
for len(read) < len(data) {
v, _, err := c.EP.Read(nil)
if err != nil {
if err == tcpip.ErrWouldBlock {
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
continue
}
t.Fatalf("Read failed: %v", err)
}
read = append(read, v...)
}
// Check that we received the data in proper order.
if !bytes.Equal(data, read) {
t.Fatalf("got data = %v, want = %v", read, data)
}
// Check that the whole data is acknowledged.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestOutOfOrderFlood(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create a new connection with initial window size of 10.
c.CreateConnected(789, 30000, 10)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
// Send 100 packets before the actual one that is expected.
data := []byte{1, 2, 3, 4, 5, 6}
for i := 0; i < 100; i++ {
c.SendPacket(data[3:], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 796,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
// Send packet with seqnum 793. It must be discarded because the
// out-of-order buffer was filled by the previous packets.
c.SendPacket(data[3:], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 793,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck),
),
)
// Now send the expected packet, seqnum 790.
c.SendPacket(data[:3], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Check that only packet 790 is acknowledged.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(793),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestRstOnCloseWithUnreadData(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
data := []byte{1, 2, 3}
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(3 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
// Check that ACK is received, this happens regardless of the read.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
// Now that we know we have unread data, let's just close the connection
// and verify that netstack sends an RST rather than a FIN.
c.EP.Close()
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst),
// We shouldn't consume a sequence number on RST.
checker.SeqNum(uint32(c.IRS)+1),
))
// The RST puts the endpoint into an error state.
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateError; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// This final ACK should be ignored because an ACK on a reset doesn't mean
// anything.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790 + len(data)),
AckNum: c.IRS.Add(seqnum.Size(2)),
RcvWnd: 30000,
})
}
func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
data := []byte{1, 2, 3}
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(3 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
// Check that ACK is received, this happens regardless of the read.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
// Cause a FIN to be generated.
c.EP.Shutdown(tcpip.ShutdownWrite)
// Make sure we get the FIN but DON't ACK IT.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
checker.SeqNum(uint32(c.IRS)+1),
))
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateFinWait1; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// Cause a RST to be generated by closing the read end now since we have
// unread data.
c.EP.Shutdown(tcpip.ShutdownRead)
// Make sure we get the RST
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst),
// We shouldn't consume a sequence number on RST.
checker.SeqNum(uint32(c.IRS)+1),
))
// The RST puts the endpoint into an error state.
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateError; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// The ACK to the FIN should now be rejected since the connection has been
// closed by a RST.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790 + len(data)),
AckNum: c.IRS.Add(seqnum.Size(2)),
RcvWnd: 30000,
})
}
func TestShutdownRead(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
if err := c.EP.Shutdown(tcpip.ShutdownRead); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
if _, _, err := c.EP.Read(nil); err != tcpip.ErrClosedForReceive {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrClosedForReceive)
}
var want uint64 = 1
if got := c.EP.Stats().(*tcp.Stats).ReadErrors.ReadClosed.Value(); got != want {
t.Fatalf("got EP stats Stats.ReadErrors.ReadClosed got %v want %v", got, want)
}
}
func TestFullWindowReceive(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, 10)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
_, _, err := c.EP.Read(nil)
if err != tcpip.ErrWouldBlock {
t.Fatalf("Read failed: %v", err)
}
// Fill up the window.
data := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
// Check that data is acknowledged, and window goes to zero.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
checker.Window(0),
),
)
// Receive data and check it.
v, _, err := c.EP.Read(nil)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
if !bytes.Equal(data, v) {
t.Fatalf("got data = %v, want = %v", v, data)
}
var want uint64 = 1
if got := c.EP.Stats().(*tcp.Stats).ReceiveErrors.ZeroRcvWindowState.Value(); got != want {
t.Fatalf("got EP stats ReceiveErrors.ZeroRcvWindowState got %v want %v", got, want)
}
// Check that we get an ACK for the newly non-zero window.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
checker.Window(10),
),
)
}
func TestNoWindowShrinking(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Start off with a window size of 10, then shrink it to 5.
c.CreateConnected(789, 30000, 10)
if err := c.EP.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 5); err != nil {
t.Fatalf("SetSockOpt failed: %v", err)
}
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
// Send 3 bytes, check that the peer acknowledges them.
data := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
c.SendPacket(data[:3], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
// Check that data is acknowledged, and that window doesn't go to zero
// just yet because it was previously set to 10. It must go to 7 now.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(793),
checker.TCPFlags(header.TCPFlagAck),
checker.Window(7),
),
)
// Send 7 more bytes, check that the window fills up.
c.SendPacket(data[3:], &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 793,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
select {
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
checker.Window(0),
),
)
// Receive data and check it.
read := make([]byte, 0, 10)
for len(read) < len(data) {
v, _, err := c.EP.Read(nil)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
read = append(read, v...)
}
if !bytes.Equal(data, read) {
t.Fatalf("got data = %v, want = %v", read, data)
}
// Check that we get an ACK for the newly non-zero window, which is the
// new size.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+len(data))),
checker.TCPFlags(header.TCPFlagAck),
checker.Window(5),
),
)
}
func TestSimpleSend(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that data is received.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if p := b[header.IPv4MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(data, p) {
t.Fatalf("got data = %v, want = %v", p, data)
}
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1 + seqnum.Size(len(data))),
RcvWnd: 30000,
})
}
func TestZeroWindowSend(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 0, -1 /* epRcvBuf */)
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
_, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{})
if err != nil {
t.Fatalf("Write failed: %v", err)
}
// Since the window is currently zero, check that no packet is received.
c.CheckNoPacket("Packet received when window is zero")
// Open up the window. Data should be received now.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Check that data is received.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if p := b[header.IPv4MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(data, p) {
t.Fatalf("got data = %v, want = %v", p, data)
}
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1 + seqnum.Size(len(data))),
RcvWnd: 30000,
})
}
func TestScaledWindowConnect(t *testing.T) {
// This test ensures that window scaling is used when the peer
// does advertise it and connection is established with Connect().
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Set the window size greater than the maximum non-scaled window.
c.CreateConnectedWithRawOptions(789, 30000, 65535*3, []byte{
header.TCPOptionWS, 3, 0, header.TCPOptionNOP,
})
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that data is received, and that advertised window is 0xbfff,
// that is, that it is scaled.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.Window(0xbfff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
}
func TestNonScaledWindowConnect(t *testing.T) {
// This test ensures that window scaling is not used when the peer
// doesn't advertise it and connection is established with Connect().
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Set the window size greater than the maximum non-scaled window.
c.CreateConnected(789, 30000, 65535*3)
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that data is received, and that advertised window is 0xffff,
// that is, that it's not scaled.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.Window(0xffff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
}
func TestScaledWindowAccept(t *testing.T) {
// This test ensures that window scaling is used when the peer
// does advertise it and connection is established with Accept().
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create EP and start listening.
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
defer ep.Close()
// Set the window size greater than the maximum non-scaled window.
if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 65535*3); err != nil {
t.Fatalf("SetSockOpt failed failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Do 3-way handshake.
c.PassiveConnectWithOptions(100, 2, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
wq.EventRegister(&we, waiter.EventIn)
defer wq.EventUnregister(&we)
c.EP, _, err = ep.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
c.EP, _, err = ep.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that data is received, and that advertised window is 0xbfff,
// that is, that it is scaled.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.Window(0xbfff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
}
func TestNonScaledWindowAccept(t *testing.T) {
// This test ensures that window scaling is not used when the peer
// doesn't advertise it and connection is established with Accept().
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create EP and start listening.
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
defer ep.Close()
// Set the window size greater than the maximum non-scaled window.
if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 65535*3); err != nil {
t.Fatalf("SetSockOpt failed failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Do 3-way handshake w/ window scaling disabled. The SYN-ACK to the SYN
// should not carry the window scaling option.
c.PassiveConnect(100, -1, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
wq.EventRegister(&we, waiter.EventIn)
defer wq.EventUnregister(&we)
c.EP, _, err = ep.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
c.EP, _, err = ep.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that data is received, and that advertised window is 0xffff,
// that is, that it's not scaled.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.Window(0xffff),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
}
func TestZeroScaledWindowReceive(t *testing.T) {
// This test ensures that the endpoint sends a non-zero window size
// advertisement when the scaled window transitions from 0 to non-zero,
// but the actual window (not scaled) hasn't gotten to zero.
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Set the window size such that a window scale of 4 will be used.
const wnd = 65535 * 10
const ws = uint32(4)
c.CreateConnectedWithRawOptions(789, 30000, wnd, []byte{
header.TCPOptionWS, 3, 0, header.TCPOptionNOP,
})
// Write chunks of 50000 bytes.
remain := wnd
sent := 0
data := make([]byte, 50000)
for remain > len(data) {
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790 + sent),
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
sent += len(data)
remain -= len(data)
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+sent)),
checker.Window(uint16(remain>>ws)),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
// Make the window non-zero, but the scaled window zero.
if remain >= 16 {
data = data[:remain-15]
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790 + sent),
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
sent += len(data)
remain -= len(data)
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+sent)),
checker.Window(0),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
// Read some data. An ack should be sent in response to that.
v, _, err := c.EP.Read(nil)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+sent)),
checker.Window(uint16(len(v)>>ws)),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestSegmentMerging(t *testing.T) {
tests := []struct {
name string
stop func(tcpip.Endpoint)
resume func(tcpip.Endpoint)
}{
{
"stop work",
func(ep tcpip.Endpoint) {
ep.(interface{ StopWork() }).StopWork()
},
func(ep tcpip.Endpoint) {
ep.(interface{ ResumeWork() }).ResumeWork()
},
},
{
"cork",
func(ep tcpip.Endpoint) {
ep.SetSockOpt(tcpip.CorkOption(1))
},
func(ep tcpip.Endpoint) {
ep.SetSockOpt(tcpip.CorkOption(0))
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Prevent the endpoint from processing packets.
test.stop(c.EP)
var allData []byte
for i, data := range [][]byte{{1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}, {11}} {
allData = append(allData, data...)
view := buffer.NewViewFromBytes(data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write #%d failed: %v", i+1, err)
}
}
// Let the endpoint process the segments that we just sent.
test.resume(c.EP)
// Check that data is received.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(allData)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if got := b[header.IPv4MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(got, allData) {
t.Fatalf("got data = %v, want = %v", got, allData)
}
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1 + seqnum.Size(len(allData))),
RcvWnd: 30000,
})
})
}
}
func TestDelay(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
c.EP.SetSockOptInt(tcpip.DelayOption, 1)
var allData []byte
for i, data := range [][]byte{{0}, {1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}, {11}} {
allData = append(allData, data...)
view := buffer.NewViewFromBytes(data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write #%d failed: %v", i+1, err)
}
}
seq := c.IRS.Add(1)
for _, want := range [][]byte{allData[:1], allData[1:]} {
// Check that data is received.
b := c.GetPacket()
checker.IPv4(t, b,
checker.PayloadLen(len(want)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(seq)),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if got := b[header.IPv4MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(got, want) {
t.Fatalf("got data = %v, want = %v", got, want)
}
seq = seq.Add(seqnum.Size(len(want)))
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seq,
RcvWnd: 30000,
})
}
}
func TestUndelay(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
c.EP.SetSockOptInt(tcpip.DelayOption, 1)
allData := [][]byte{{0}, {1, 2, 3}}
for i, data := range allData {
view := buffer.NewViewFromBytes(data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write #%d failed: %v", i+1, err)
}
}
seq := c.IRS.Add(1)
// Check that data is received.
first := c.GetPacket()
checker.IPv4(t, first,
checker.PayloadLen(len(allData[0])+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(seq)),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if got, want := first[header.IPv4MinimumSize+header.TCPMinimumSize:], allData[0]; !bytes.Equal(got, want) {
t.Fatalf("got first packet's data = %v, want = %v", got, want)
}
seq = seq.Add(seqnum.Size(len(allData[0])))
// Check that we don't get the second packet yet.
c.CheckNoPacketTimeout("delayed second packet transmitted", 100*time.Millisecond)
c.EP.SetSockOptInt(tcpip.DelayOption, 0)
// Check that data is received.
second := c.GetPacket()
checker.IPv4(t, second,
checker.PayloadLen(len(allData[1])+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(seq)),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if got, want := second[header.IPv4MinimumSize+header.TCPMinimumSize:], allData[1]; !bytes.Equal(got, want) {
t.Fatalf("got second packet's data = %v, want = %v", got, want)
}
seq = seq.Add(seqnum.Size(len(allData[1])))
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seq,
RcvWnd: 30000,
})
}
func TestMSSNotDelayed(t *testing.T) {
tests := []struct {
name string
fn func(tcpip.Endpoint)
}{
{"no-op", func(tcpip.Endpoint) {}},
{"delay", func(ep tcpip.Endpoint) { ep.SetSockOptInt(tcpip.DelayOption, 1) }},
{"cork", func(ep tcpip.Endpoint) { ep.SetSockOpt(tcpip.CorkOption(1)) }},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
const maxPayload = 100
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnectedWithRawOptions(789, 30000, -1 /* epRcvBuf */, []byte{
header.TCPOptionMSS, 4, byte(maxPayload / 256), byte(maxPayload % 256),
})
test.fn(c.EP)
allData := [][]byte{{0}, make([]byte, maxPayload), make([]byte, maxPayload)}
for i, data := range allData {
view := buffer.NewViewFromBytes(data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write #%d failed: %v", i+1, err)
}
}
seq := c.IRS.Add(1)
for i, data := range allData {
// Check that data is received.
packet := c.GetPacket()
checker.IPv4(t, packet,
checker.PayloadLen(len(data)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(seq)),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
if got, want := packet[header.IPv4MinimumSize+header.TCPMinimumSize:], data; !bytes.Equal(got, want) {
t.Fatalf("got packet #%d's data = %v, want = %v", i+1, got, want)
}
seq = seq.Add(seqnum.Size(len(data)))
}
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seq,
RcvWnd: 30000,
})
})
}
}
func testBrokenUpWrite(t *testing.T, c *context.Context, maxPayload int) {
payloadMultiplier := 10
dataLen := payloadMultiplier * maxPayload
data := make([]byte, dataLen)
for i := range data {
data[i] = byte(i)
}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that data is received in chunks.
bytesReceived := 0
numPackets := 0
for bytesReceived != dataLen {
b := c.GetPacket()
numPackets++
tcpHdr := header.TCP(header.IPv4(b).Payload())
payloadLen := len(tcpHdr.Payload())
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1+uint32(bytesReceived)),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
pdata := data[bytesReceived : bytesReceived+payloadLen]
if p := tcpHdr.Payload(); !bytes.Equal(pdata, p) {
t.Fatalf("got data = %v, want = %v", p, pdata)
}
bytesReceived += payloadLen
var options []byte
if c.TimeStampEnabled {
// If timestamp option is enabled, echo back the timestamp and increment
// the TSEcr value included in the packet and send that back as the TSVal.
parsedOpts := tcpHdr.ParsedOptions()
tsOpt := [12]byte{header.TCPOptionNOP, header.TCPOptionNOP}
header.EncodeTSOption(parsedOpts.TSEcr+1, parsedOpts.TSVal, tsOpt[2:])
options = tsOpt[:]
}
// Acknowledge the data.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1 + seqnum.Size(bytesReceived)),
RcvWnd: 30000,
TCPOpts: options,
})
}
if numPackets == 1 {
t.Fatalf("expected write to be broken up into multiple packets, but got 1 packet")
}
}
func TestSendGreaterThanMTU(t *testing.T) {
const maxPayload = 100
c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload))
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
testBrokenUpWrite(t, c, maxPayload)
}
func TestSetTTL(t *testing.T) {
for _, wantTTL := range []uint8{1, 2, 50, 64, 128, 254, 255} {
t.Run(fmt.Sprintf("TTL:%d", wantTTL), func(t *testing.T) {
c := context.New(t, 65535)
defer c.Cleanup()
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
if err := c.EP.SetSockOpt(tcpip.TTLOption(wantTTL)); err != nil {
t.Fatalf("SetSockOpt failed: %v", err)
}
if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("Unexpected return value from Connect: %v", err)
}
// Receive SYN packet.
b := c.GetPacket()
checker.IPv4(t, b, checker.TTL(wantTTL))
})
}
}
func TestActiveSendMSSLessThanMTU(t *testing.T) {
const maxPayload = 100
c := context.New(t, 65535)
defer c.Cleanup()
c.CreateConnectedWithRawOptions(789, 30000, -1 /* epRcvBuf */, []byte{
header.TCPOptionMSS, 4, byte(maxPayload / 256), byte(maxPayload % 256),
})
testBrokenUpWrite(t, c, maxPayload)
}
func TestPassiveSendMSSLessThanMTU(t *testing.T) {
const maxPayload = 100
const mtu = 1200
c := context.New(t, mtu)
defer c.Cleanup()
// Create EP and start listening.
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
defer ep.Close()
// Set the buffer size to a deterministic size so that we can check the
// window scaling option.
const rcvBufferSize = 0x20000
if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, rcvBufferSize); err != nil {
t.Fatalf("SetSockOpt failed failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Do 3-way handshake.
c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
wq.EventRegister(&we, waiter.EventIn)
defer wq.EventUnregister(&we)
c.EP, _, err = ep.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
c.EP, _, err = ep.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
// Check that data gets properly segmented.
testBrokenUpWrite(t, c, maxPayload)
}
func TestSynCookiePassiveSendMSSLessThanMTU(t *testing.T) {
const maxPayload = 536
const mtu = 2000
c := context.New(t, mtu)
defer c.Cleanup()
// Set the SynRcvd threshold to zero to force a syn cookie based accept
// to happen.
saved := tcp.SynRcvdCountThreshold
defer func() {
tcp.SynRcvdCountThreshold = saved
}()
tcp.SynRcvdCountThreshold = 0
// Create EP and start listening.
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
defer ep.Close()
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Do 3-way handshake.
c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
wq.EventRegister(&we, waiter.EventIn)
defer wq.EventUnregister(&we)
c.EP, _, err = ep.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
c.EP, _, err = ep.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
// Check that data gets properly segmented.
testBrokenUpWrite(t, c, maxPayload)
}
func TestForwarderSendMSSLessThanMTU(t *testing.T) {
const maxPayload = 100
const mtu = 1200
c := context.New(t, mtu)
defer c.Cleanup()
s := c.Stack()
ch := make(chan *tcpip.Error, 1)
f := tcp.NewForwarder(s, 65536, 10, func(r *tcp.ForwarderRequest) {
var err *tcpip.Error
c.EP, err = r.CreateEndpoint(&c.WQ)
ch <- err
})
s.SetTransportProtocolHandler(tcp.ProtocolNumber, f.HandlePacket)
// Do 3-way handshake.
c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
// Wait for connection to be available.
select {
case err := <-ch:
if err != nil {
t.Fatalf("Error creating endpoint: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatalf("Timed out waiting for connection")
}
// Check that data gets properly segmented.
testBrokenUpWrite(t, c, maxPayload)
}
func TestSynOptionsOnActiveConnect(t *testing.T) {
const mtu = 1400
c := context.New(t, mtu)
defer c.Cleanup()
// Create TCP endpoint.
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
// Set the buffer size to a deterministic size so that we can check the
// window scaling option.
const rcvBufferSize = 0x20000
const wndScale = 2
if err := c.EP.SetSockOptInt(tcpip.ReceiveBufferSizeOption, rcvBufferSize); err != nil {
t.Fatalf("SetSockOpt failed failed: %v", err)
}
// Start connection attempt.
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventOut)
defer c.WQ.EventUnregister(&we)
if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("got c.EP.Connect(...) = %v, want = %v", err, tcpip.ErrConnectStarted)
}
// Receive SYN packet.
b := c.GetPacket()
mss := uint16(mtu - header.IPv4MinimumSize - header.TCPMinimumSize)
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagSyn),
checker.TCPSynOptions(header.TCPSynOptions{MSS: mss, WS: wndScale}),
),
)
tcpHdr := header.TCP(header.IPv4(b).Payload())
c.IRS = seqnum.Value(tcpHdr.SequenceNumber())
// Wait for retransmit.
time.Sleep(1 * time.Second)
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagSyn),
checker.SrcPort(tcpHdr.SourcePort()),
checker.SeqNum(tcpHdr.SequenceNumber()),
checker.TCPSynOptions(header.TCPSynOptions{MSS: mss, WS: wndScale}),
),
)
// Send SYN-ACK.
iss := seqnum.Value(789)
c.SendPacket(nil, &context.Headers{
SrcPort: tcpHdr.DestinationPort(),
DstPort: tcpHdr.SourcePort(),
Flags: header.TCPFlagSyn | header.TCPFlagAck,
SeqNum: iss,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
// Receive ACK packet.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(iss)+1),
),
)
// Wait for connection to be established.
select {
case <-ch:
if err := c.EP.GetSockOpt(tcpip.ErrorOption{}); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for connection")
}
}
func TestCloseListener(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create listener.
var wq waiter.Queue
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Close the listener and measure how long it takes.
t0 := time.Now()
ep.Close()
if diff := time.Now().Sub(t0); diff > 3*time.Second {
t.Fatalf("Took too long to close: %v", diff)
}
}
func TestReceiveOnResetConnection(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Send RST segment.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagRst,
SeqNum: 790,
RcvWnd: 30000,
})
// Try to read.
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
loop:
for {
switch _, _, err := c.EP.Read(nil); err {
case tcpip.ErrWouldBlock:
select {
case <-ch:
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for reset to arrive")
}
case tcpip.ErrConnectionReset:
break loop
default:
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrConnectionReset)
}
}
// Expect the state to be StateError and subsequent Reads to fail with HardError.
if _, _, err := c.EP.Read(nil); err != tcpip.ErrConnectionReset {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrConnectionReset)
}
if tcp.EndpointState(c.EP.State()) != tcp.StateError {
t.Fatalf("got EP state is not StateError")
}
}
func TestSendOnResetConnection(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Send RST segment.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagRst,
SeqNum: 790,
RcvWnd: 30000,
})
// Wait for the RST to be received.
time.Sleep(1 * time.Second)
// Try to write.
view := buffer.NewView(10)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != tcpip.ErrConnectionReset {
t.Fatalf("got c.EP.Write(...) = %v, want = %v", err, tcpip.ErrConnectionReset)
}
}
func TestFinImmediately(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Shutdown immediately, check that we get a FIN.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
// Ack and send FIN as well.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: c.IRS.Add(2),
RcvWnd: 30000,
})
// Check that the stack acks the FIN.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+2),
checker.AckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestFinRetransmit(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Shutdown immediately, check that we get a FIN.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
// Don't acknowledge yet. We should get a retransmit of the FIN.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
// Ack and send FIN as well.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: c.IRS.Add(2),
RcvWnd: 30000,
})
// Check that the stack acks the FIN.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+2),
checker.AckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestFinWithNoPendingData(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Write something out, and have it acknowledged.
view := buffer.NewView(10)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
next := uint32(c.IRS) + 1
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
next += uint32(len(view))
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
// Shutdown, check that we get a FIN.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
next++
// Ack and send FIN as well.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
// Check that the stack acks the FIN.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestFinWithPendingDataCwndFull(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Write enough segments to fill the congestion window before ACK'ing
// any of them.
view := buffer.NewView(10)
for i := tcp.InitialCwnd; i > 0; i-- {
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
}
next := uint32(c.IRS) + 1
for i := tcp.InitialCwnd; i > 0; i-- {
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
next += uint32(len(view))
}
// Shutdown the connection, check that the FIN segment isn't sent
// because the congestion window doesn't allow it. Wait until a
// retransmit is received.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
// Send the ACK that will allow the FIN to be sent as well.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
next++
// Send a FIN that acknowledges everything. Get an ACK back.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestFinWithPendingData(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Write something out, and acknowledge it to get cwnd to 2.
view := buffer.NewView(10)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
next := uint32(c.IRS) + 1
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
next += uint32(len(view))
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
// Write new data, but don't acknowledge it.
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
next += uint32(len(view))
// Shutdown the connection, check that we do get a FIN.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
next++
// Send a FIN that acknowledges everything. Get an ACK back.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(791),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
func TestFinWithPartialAck(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Write something out, and acknowledge it to get cwnd to 2. Also send
// FIN from the test side.
view := buffer.NewView(10)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
next := uint32(c.IRS) + 1
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
next += uint32(len(view))
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
// Check that we get an ACK for the fin.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(791),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
// Write new data, but don't acknowledge it.
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(791),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
next += uint32(len(view))
// Shutdown the connection, check that we do get a FIN.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(791),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
next++
// Send an ACK for the data, but not for the FIN yet.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 791,
AckNum: seqnum.Value(next - 1),
RcvWnd: 30000,
})
// Check that we don't get a retransmit of the FIN.
c.CheckNoPacketTimeout("FIN retransmitted when data was ack'd", 100*time.Millisecond)
// Ack the FIN.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 791,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
}
func TestUpdateListenBacklog(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create listener.
var wq waiter.Queue
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Update the backlog with another Listen() on the same endpoint.
if err := ep.Listen(20); err != nil {
t.Fatalf("Listen failed to update backlog: %v", err)
}
ep.Close()
}
func scaledSendWindow(t *testing.T, scale uint8) {
// This test ensures that the endpoint is using the right scaling by
// sending a buffer that is larger than the window size, and ensuring
// that the endpoint doesn't send more than allowed.
c := context.New(t, defaultMTU)
defer c.Cleanup()
maxPayload := defaultMTU - header.IPv4MinimumSize - header.TCPMinimumSize
c.CreateConnectedWithRawOptions(789, 0, -1 /* epRcvBuf */, []byte{
header.TCPOptionMSS, 4, byte(maxPayload / 256), byte(maxPayload % 256),
header.TCPOptionWS, 3, scale, header.TCPOptionNOP,
})
// Open up the window with a scaled value.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 1,
})
// Send some data. Check that it's capped by the window size.
view := buffer.NewView(65535)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Check that only data that fits in the scaled window is sent.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen((1<<scale)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
// Reset the connection to free resources.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagRst,
SeqNum: 790,
})
}
func TestScaledSendWindow(t *testing.T) {
for scale := uint8(0); scale <= 14; scale++ {
scaledSendWindow(t, scale)
}
}
func TestReceivedValidSegmentCountIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
stats := c.Stack().Stats()
want := stats.TCP.ValidSegmentsReceived.Value() + 1
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790),
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
if got := stats.TCP.ValidSegmentsReceived.Value(); got != want {
t.Errorf("got stats.TCP.ValidSegmentsReceived.Value() = %v, want = %v", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).SegmentsReceived.Value(); got != want {
t.Errorf("got EP stats Stats.SegmentsReceived = %v, want = %v", got, want)
}
// Ensure there were no errors during handshake. If these stats have
// incremented, then the connection should not have been established.
if got := c.EP.Stats().(*tcp.Stats).SendErrors.NoRoute.Value(); got != 0 {
t.Errorf("got EP stats Stats.SendErrors.NoRoute = %v, want = %v", got, 0)
}
if got := c.EP.Stats().(*tcp.Stats).SendErrors.NoLinkAddr.Value(); got != 0 {
t.Errorf("got EP stats Stats.SendErrors.NoLinkAddr = %v, want = %v", got, 0)
}
}
func TestReceivedInvalidSegmentCountIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
stats := c.Stack().Stats()
want := stats.TCP.InvalidSegmentsReceived.Value() + 1
vv := c.BuildSegment(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790),
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
tcpbuf := vv.First()[header.IPv4MinimumSize:]
tcpbuf[header.TCPDataOffset] = ((header.TCPMinimumSize - 1) / 4) << 4
c.SendSegment(vv)
if got := stats.TCP.InvalidSegmentsReceived.Value(); got != want {
t.Errorf("got stats.TCP.InvalidSegmentsReceived.Value() = %v, want = %v", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).ReceiveErrors.MalformedPacketsReceived.Value(); got != want {
t.Errorf("got EP Stats.ReceiveErrors.MalformedPacketsReceived stats = %v, want = %v", got, want)
}
}
func TestReceivedIncorrectChecksumIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
stats := c.Stack().Stats()
want := stats.TCP.ChecksumErrors.Value() + 1
vv := c.BuildSegment([]byte{0x1, 0x2, 0x3}, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790),
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
tcpbuf := vv.First()[header.IPv4MinimumSize:]
// Overwrite a byte in the payload which should cause checksum
// verification to fail.
tcpbuf[(tcpbuf[header.TCPDataOffset]>>4)*4] = 0x4
c.SendSegment(vv)
if got := stats.TCP.ChecksumErrors.Value(); got != want {
t.Errorf("got stats.TCP.ChecksumErrors.Value() = %d, want = %d", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).ReceiveErrors.ChecksumErrors.Value(); got != want {
t.Errorf("got EP stats Stats.ReceiveErrors.ChecksumErrors = %d, want = %d", got, want)
}
}
func TestReceivedSegmentQueuing(t *testing.T) {
// This test sends 200 segments containing a few bytes each to an
// endpoint and checks that they're all received and acknowledged by
// the endpoint, that is, that none of the segments are dropped by
// internal queues.
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
// Send 200 segments.
data := []byte{1, 2, 3}
for i := 0; i < 200; i++ {
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: seqnum.Value(790 + i*len(data)),
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
}
// Receive ACKs for all segments.
last := seqnum.Value(790 + 200*len(data))
for {
b := c.GetPacket()
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.TCPFlags(header.TCPFlagAck),
),
)
tcpHdr := header.TCP(header.IPv4(b).Payload())
ack := seqnum.Value(tcpHdr.AckNumber())
if ack == last {
break
}
if last.LessThan(ack) {
t.Fatalf("Acknowledge (%v) beyond the expected (%v)", ack, last)
}
}
}
func TestReadAfterClosedState(t *testing.T) {
// This test ensures that calling Read() or Peek() after the endpoint
// has transitioned to closedState still works if there is pending
// data. To transition to stateClosed without calling Close(), we must
// shutdown the send path and the peer must send its own FIN.
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
// Shutdown immediately for write, check that we get a FIN.
if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil {
t.Fatalf("Shutdown failed: %v", err)
}
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagFin),
),
)
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateFinWait1; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// Send some data and acknowledge the FIN.
data := []byte{1, 2, 3}
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck | header.TCPFlagFin,
SeqNum: 790,
AckNum: c.IRS.Add(2),
RcvWnd: 30000,
})
// Check that ACK is received.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+2),
checker.AckNum(uint32(791+len(data))),
checker.TCPFlags(header.TCPFlagAck),
),
)
// Give the stack the chance to transition to closed state. Note that since
// both the sender and receiver are now closed, we effectively skip the
// TIME-WAIT state.
time.Sleep(1 * time.Second)
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateClose; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// Wait for receive to be notified.
select {
case <-ch:
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for data to arrive")
}
// Check that peek works.
peekBuf := make([]byte, 10)
n, _, err := c.EP.Peek([][]byte{peekBuf})
if err != nil {
t.Fatalf("Peek failed: %v", err)
}
peekBuf = peekBuf[:n]
if !bytes.Equal(data, peekBuf) {
t.Fatalf("got data = %v, want = %v", peekBuf, data)
}
// Receive data.
v, _, err := c.EP.Read(nil)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
if !bytes.Equal(data, v) {
t.Fatalf("got data = %v, want = %v", v, data)
}
// Now that we drained the queue, check that functions fail with the
// right error code.
if _, _, err := c.EP.Read(nil); err != tcpip.ErrClosedForReceive {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrClosedForReceive)
}
if _, _, err := c.EP.Peek([][]byte{peekBuf}); err != tcpip.ErrClosedForReceive {
t.Fatalf("got c.EP.Peek(...) = %v, want = %v", err, tcpip.ErrClosedForReceive)
}
}
func TestReusePort(t *testing.T) {
// This test ensures that ports are immediately available for reuse
// after Close on the endpoints using them returns.
c := context.New(t, defaultMTU)
defer c.Cleanup()
// First case, just an endpoint that was bound.
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
c.EP.Close()
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
c.EP.Close()
// Second case, an endpoint that was bound and is connecting..
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("got c.EP.Connect(...) = %v, want = %v", err, tcpip.ErrConnectStarted)
}
c.EP.Close()
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
c.EP.Close()
// Third case, an endpoint that was bound and is listening.
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := c.EP.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
c.EP.Close()
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := c.EP.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
}
func checkRecvBufferSize(t *testing.T, ep tcpip.Endpoint, v int) {
t.Helper()
s, err := ep.GetSockOptInt(tcpip.ReceiveBufferSizeOption)
if err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
if int(s) != v {
t.Fatalf("got receive buffer size = %v, want = %v", s, v)
}
}
func checkSendBufferSize(t *testing.T, ep tcpip.Endpoint, v int) {
t.Helper()
s, err := ep.GetSockOptInt(tcpip.SendBufferSizeOption)
if err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
if int(s) != v {
t.Fatalf("got send buffer size = %v, want = %v", s, v)
}
}
func TestDefaultBufferSizes(t *testing.T) {
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
})
// Check the default values.
ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
defer func() {
if ep != nil {
ep.Close()
}
}()
checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize)
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default send buffer size.
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
ep.Close()
ep, err = s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2)
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default receive buffer size.
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
ep.Close()
ep, err = s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2)
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3)
}
func TestMinMaxBufferSizes(t *testing.T) {
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
})
// Check the default values.
ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
defer ep.Close()
// Change the min/max values for send/receive
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
// Set values below the min.
if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 199); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
checkRecvBufferSize(t, ep, 200)
if err := ep.SetSockOptInt(tcpip.SendBufferSizeOption, 299); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
checkSendBufferSize(t, ep, 300)
// Set values above the max.
if err := ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, 1+tcp.DefaultReceiveBufferSize*20); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20)
if err := ep.SetSockOptInt(tcpip.SendBufferSizeOption, 1+tcp.DefaultSendBufferSize*30); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30)
}
func TestBindToDeviceOption(t *testing.T) {
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()}})
ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
if err != nil {
t.Fatalf("NewEndpoint failed; %v", err)
}
defer ep.Close()
if err := s.CreateNamedNIC(321, "my_device", loopback.New()); err != nil {
t.Errorf("CreateNamedNIC failed: %v", err)
}
// Make an nameless NIC.
if err := s.CreateNIC(54321, loopback.New()); err != nil {
t.Errorf("CreateNIC failed: %v", err)
}
// strPtr is used instead of taking the address of string literals, which is
// a compiler error.
strPtr := func(s string) *string {
return &s
}
testActions := []struct {
name string
setBindToDevice *string
setBindToDeviceError *tcpip.Error
getBindToDevice tcpip.BindToDeviceOption
}{
{"GetDefaultValue", nil, nil, ""},
{"BindToNonExistent", strPtr("non_existent_device"), tcpip.ErrUnknownDevice, ""},
{"BindToExistent", strPtr("my_device"), nil, "my_device"},
{"UnbindToDevice", strPtr(""), nil, ""},
}
for _, testAction := range testActions {
t.Run(testAction.name, func(t *testing.T) {
if testAction.setBindToDevice != nil {
bindToDevice := tcpip.BindToDeviceOption(*testAction.setBindToDevice)
if got, want := ep.SetSockOpt(bindToDevice), testAction.setBindToDeviceError; got != want {
t.Errorf("SetSockOpt(%v) got %v, want %v", bindToDevice, got, want)
}
}
bindToDevice := tcpip.BindToDeviceOption("to be modified by GetSockOpt")
if ep.GetSockOpt(&bindToDevice) != nil {
t.Errorf("GetSockOpt got %v, want %v", ep.GetSockOpt(&bindToDevice), nil)
}
if got, want := bindToDevice, testAction.getBindToDevice; got != want {
t.Errorf("bindToDevice got %q, want %q", got, want)
}
})
}
}
func makeStack() (*stack.Stack, *tcpip.Error) {
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocol{
ipv4.NewProtocol(),
ipv6.NewProtocol(),
},
TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
})
id := loopback.New()
if testing.Verbose() {
id = sniffer.New(id)
}
if err := s.CreateNIC(1, id); err != nil {
return nil, err
}
for _, ct := range []struct {
number tcpip.NetworkProtocolNumber
address tcpip.Address
}{
{ipv4.ProtocolNumber, context.StackAddr},
{ipv6.ProtocolNumber, context.StackV6Addr},
} {
if err := s.AddAddress(1, ct.number, ct.address); err != nil {
return nil, err
}
}
s.SetRouteTable([]tcpip.Route{
{
Destination: header.IPv4EmptySubnet,
NIC: 1,
},
{
Destination: header.IPv6EmptySubnet,
NIC: 1,
},
})
return s, nil
}
func TestSelfConnect(t *testing.T) {
// This test ensures that intentional self-connects work. In particular,
// it checks that if an endpoint binds to say 127.0.0.1:1000 then
// connects to 127.0.0.1:1000, then it will be connected to itself, and
// is able to send and receive data through the same endpoint.
s, err := makeStack()
if err != nil {
t.Fatal(err)
}
var wq waiter.Queue
ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
defer ep.Close()
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
// Register for notification, then start connection attempt.
waitEntry, notifyCh := waiter.NewChannelEntry(nil)
wq.EventRegister(&waitEntry, waiter.EventOut)
defer wq.EventUnregister(&waitEntry)
if err := ep.Connect(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}); err != tcpip.ErrConnectStarted {
t.Fatalf("got ep.Connect(...) = %v, want = %v", err, tcpip.ErrConnectStarted)
}
<-notifyCh
if err := ep.GetSockOpt(tcpip.ErrorOption{}); err != nil {
t.Fatalf("Connect failed: %v", err)
}
// Write something.
data := []byte{1, 2, 3}
view := buffer.NewView(len(data))
copy(view, data)
if _, _, err := ep.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
// Read back what was written.
wq.EventUnregister(&waitEntry)
wq.EventRegister(&waitEntry, waiter.EventIn)
rd, _, err := ep.Read(nil)
if err != nil {
if err != tcpip.ErrWouldBlock {
t.Fatalf("Read failed: %v", err)
}
<-notifyCh
rd, _, err = ep.Read(nil)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
}
if !bytes.Equal(data, rd) {
t.Fatalf("got data = %v, want = %v", rd, data)
}
}
func TestConnectAvoidsBoundPorts(t *testing.T) {
addressTypes := func(t *testing.T, network string) []string {
switch network {
case "ipv4":
return []string{"v4"}
case "ipv6":
return []string{"v6"}
case "dual":
return []string{"v6", "mapped"}
default:
t.Fatalf("unknown network: '%s'", network)
}
panic("unreachable")
}
address := func(t *testing.T, addressType string, isAny bool) tcpip.Address {
switch addressType {
case "v4":
if isAny {
return ""
}
return context.StackAddr
case "v6":
if isAny {
return ""
}
return context.StackV6Addr
case "mapped":
if isAny {
return context.V4MappedWildcardAddr
}
return context.StackV4MappedAddr
default:
t.Fatalf("unknown address type: '%s'", addressType)
}
panic("unreachable")
}
// This test ensures that Endpoint.Connect doesn't select already-bound ports.
networks := []string{"ipv4", "ipv6", "dual"}
for _, exhaustedNetwork := range networks {
t.Run(fmt.Sprintf("exhaustedNetwork=%s", exhaustedNetwork), func(t *testing.T) {
for _, exhaustedAddressType := range addressTypes(t, exhaustedNetwork) {
t.Run(fmt.Sprintf("exhaustedAddressType=%s", exhaustedAddressType), func(t *testing.T) {
for _, isAny := range []bool{false, true} {
t.Run(fmt.Sprintf("isAny=%t", isAny), func(t *testing.T) {
for _, candidateNetwork := range networks {
t.Run(fmt.Sprintf("candidateNetwork=%s", candidateNetwork), func(t *testing.T) {
for _, candidateAddressType := range addressTypes(t, candidateNetwork) {
t.Run(fmt.Sprintf("candidateAddressType=%s", candidateAddressType), func(t *testing.T) {
s, err := makeStack()
if err != nil {
t.Fatal(err)
}
var wq waiter.Queue
var eps []tcpip.Endpoint
defer func() {
for _, ep := range eps {
ep.Close()
}
}()
makeEP := func(network string) tcpip.Endpoint {
var networkProtocolNumber tcpip.NetworkProtocolNumber
switch network {
case "ipv4":
networkProtocolNumber = ipv4.ProtocolNumber
case "ipv6", "dual":
networkProtocolNumber = ipv6.ProtocolNumber
default:
t.Fatalf("unknown network: '%s'", network)
}
ep, err := s.NewEndpoint(tcp.ProtocolNumber, networkProtocolNumber, &wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
eps = append(eps, ep)
switch network {
case "ipv4":
case "ipv6":
if err := ep.SetSockOpt(tcpip.V6OnlyOption(1)); err != nil {
t.Fatalf("SetSockOpt(V6OnlyOption(1)) failed: %v", err)
}
case "dual":
if err := ep.SetSockOpt(tcpip.V6OnlyOption(0)); err != nil {
t.Fatalf("SetSockOpt(V6OnlyOption(0)) failed: %v", err)
}
default:
t.Fatalf("unknown network: '%s'", network)
}
return ep
}
var v4reserved, v6reserved bool
switch exhaustedAddressType {
case "v4", "mapped":
v4reserved = true
case "v6":
v6reserved = true
// Dual stack sockets bound to v6 any reserve on v4 as
// well.
if isAny {
switch exhaustedNetwork {
case "ipv6":
case "dual":
v4reserved = true
default:
t.Fatalf("unknown address type: '%s'", exhaustedNetwork)
}
}
default:
t.Fatalf("unknown address type: '%s'", exhaustedAddressType)
}
var collides bool
switch candidateAddressType {
case "v4", "mapped":
collides = v4reserved
case "v6":
collides = v6reserved
default:
t.Fatalf("unknown address type: '%s'", candidateAddressType)
}
for i := ports.FirstEphemeral; i <= math.MaxUint16; i++ {
if makeEP(exhaustedNetwork).Bind(tcpip.FullAddress{Addr: address(t, exhaustedAddressType, isAny), Port: uint16(i)}); err != nil {
t.Fatalf("Bind(%d) failed: %v", i, err)
}
}
want := tcpip.ErrConnectStarted
if collides {
want = tcpip.ErrNoPortAvailable
}
if err := makeEP(candidateNetwork).Connect(tcpip.FullAddress{Addr: address(t, candidateAddressType, false), Port: 31337}); err != want {
t.Fatalf("got ep.Connect(..) = %v, want = %v", err, want)
}
})
}
})
}
})
}
})
}
})
}
}
func TestPathMTUDiscovery(t *testing.T) {
// This test verifies the stack retransmits packets after it receives an
// ICMP packet indicating that the path MTU has been exceeded.
c := context.New(t, 1500)
defer c.Cleanup()
// Create new connection with MSS of 1460.
const maxPayload = 1500 - header.TCPMinimumSize - header.IPv4MinimumSize
c.CreateConnectedWithRawOptions(789, 30000, -1 /* epRcvBuf */, []byte{
header.TCPOptionMSS, 4, byte(maxPayload / 256), byte(maxPayload % 256),
})
// Send 3200 bytes of data.
const writeSize = 3200
data := buffer.NewView(writeSize)
for i := range data {
data[i] = byte(i)
}
if _, _, err := c.EP.Write(tcpip.SlicePayload(data), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
receivePackets := func(c *context.Context, sizes []int, which int, seqNum uint32) []byte {
var ret []byte
for i, size := range sizes {
p := c.GetPacket()
if i == which {
ret = p
}
checker.IPv4(t, p,
checker.PayloadLen(size+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(seqNum),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
seqNum += uint32(size)
}
return ret
}
// Receive three packets.
sizes := []int{maxPayload, maxPayload, writeSize - 2*maxPayload}
first := receivePackets(c, sizes, 0, uint32(c.IRS)+1)
// Send "packet too big" messages back to netstack.
const newMTU = 1200
const newMaxPayload = newMTU - header.IPv4MinimumSize - header.TCPMinimumSize
mtu := []byte{0, 0, newMTU / 256, newMTU % 256}
c.SendICMPPacket(header.ICMPv4DstUnreachable, header.ICMPv4FragmentationNeeded, mtu, first, newMTU)
// See retransmitted packets. None exceeding the new max.
sizes = []int{newMaxPayload, maxPayload - newMaxPayload, newMaxPayload, maxPayload - newMaxPayload, writeSize - 2*maxPayload}
receivePackets(c, sizes, -1, uint32(c.IRS)+1)
}
func TestTCPEndpointProbe(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
invoked := make(chan struct{})
c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) {
// Validate that the endpoint ID is what we expect.
//
// We don't do an extensive validation of every field but a
// basic sanity test.
if got, want := state.ID.LocalAddress, tcpip.Address(context.StackAddr); got != want {
t.Fatalf("got LocalAddress: %q, want: %q", got, want)
}
if got, want := state.ID.LocalPort, c.Port; got != want {
t.Fatalf("got LocalPort: %d, want: %d", got, want)
}
if got, want := state.ID.RemoteAddress, tcpip.Address(context.TestAddr); got != want {
t.Fatalf("got RemoteAddress: %q, want: %q", got, want)
}
if got, want := state.ID.RemotePort, uint16(context.TestPort); got != want {
t.Fatalf("got RemotePort: %d, want: %d", got, want)
}
invoked <- struct{}{}
})
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
data := []byte{1, 2, 3}
c.SendPacket(data, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
select {
case <-invoked:
case <-time.After(100 * time.Millisecond):
t.Fatalf("TCP Probe function was not called")
}
}
func TestStackSetCongestionControl(t *testing.T) {
testCases := []struct {
cc tcpip.CongestionControlOption
err *tcpip.Error
}{
{"reno", nil},
{"cubic", nil},
{"blahblah", tcpip.ErrNoSuchFile},
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("SetTransportProtocolOption(.., %v)", tc.cc), func(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
s := c.Stack()
var oldCC tcpip.CongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &oldCC); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &oldCC, err)
}
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tc.cc); err != tc.err {
t.Fatalf("s.SetTransportProtocolOption(%v, %v) = %v, want %v", tcp.ProtocolNumber, tc.cc, err, tc.err)
}
var cc tcpip.CongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err)
}
got, want := cc, oldCC
// If SetTransportProtocolOption is expected to succeed
// then the returned value for congestion control should
// match the one specified in the
// SetTransportProtocolOption call above, else it should
// be what it was before the call to
// SetTransportProtocolOption.
if tc.err == nil {
want = tc.cc
}
if got != want {
t.Fatalf("got congestion control: %v, want: %v", got, want)
}
})
}
}
func TestStackAvailableCongestionControl(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
s := c.Stack()
// Query permitted congestion control algorithms.
var aCC tcpip.AvailableCongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &aCC); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &aCC, err)
}
if got, want := aCC, tcpip.AvailableCongestionControlOption("reno cubic"); got != want {
t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want)
}
}
func TestStackSetAvailableCongestionControl(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
s := c.Stack()
// Setting AvailableCongestionControlOption should fail.
aCC := tcpip.AvailableCongestionControlOption("xyz")
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &aCC); err == nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = nil, want non-nil", tcp.ProtocolNumber, &aCC)
}
// Verify that we still get the expected list of congestion control options.
var cc tcpip.AvailableCongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err)
}
if got, want := cc, tcpip.AvailableCongestionControlOption("reno cubic"); got != want {
t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want)
}
}
func TestEndpointSetCongestionControl(t *testing.T) {
testCases := []struct {
cc tcpip.CongestionControlOption
err *tcpip.Error
}{
{"reno", nil},
{"cubic", nil},
{"blahblah", tcpip.ErrNoSuchFile},
}
for _, connected := range []bool{false, true} {
for _, tc := range testCases {
t.Run(fmt.Sprintf("SetSockOpt(.., %v) w/ connected = %v", tc.cc, connected), func(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
// Create TCP endpoint.
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
var oldCC tcpip.CongestionControlOption
if err := c.EP.GetSockOpt(&oldCC); err != nil {
t.Fatalf("c.EP.SockOpt(%v) = %v", &oldCC, err)
}
if connected {
c.Connect(789 /* iss */, 32768 /* rcvWnd */, nil)
}
if err := c.EP.SetSockOpt(tc.cc); err != tc.err {
t.Fatalf("c.EP.SetSockOpt(%v) = %v, want %v", tc.cc, err, tc.err)
}
var cc tcpip.CongestionControlOption
if err := c.EP.GetSockOpt(&cc); err != nil {
t.Fatalf("c.EP.SockOpt(%v) = %v", &cc, err)
}
got, want := cc, oldCC
// If SetSockOpt is expected to succeed then the
// returned value for congestion control should match
// the one specified in the SetSockOpt above, else it
// should be what it was before the call to SetSockOpt.
if tc.err == nil {
want = tc.cc
}
if got != want {
t.Fatalf("got congestion control: %v, want: %v", got, want)
}
})
}
}
}
func enableCUBIC(t *testing.T, c *context.Context) {
t.Helper()
opt := tcpip.CongestionControlOption("cubic")
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, opt); err != nil {
t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, %v = %v", opt, err)
}
}
func TestKeepalive(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
c.EP.SetSockOpt(tcpip.KeepaliveIdleOption(10 * time.Millisecond))
c.EP.SetSockOpt(tcpip.KeepaliveIntervalOption(10 * time.Millisecond))
c.EP.SetSockOpt(tcpip.KeepaliveCountOption(5))
c.EP.SetSockOpt(tcpip.KeepaliveEnabledOption(1))
// 5 unacked keepalives are sent. ACK each one, and check that the
// connection stays alive after 5.
for i := 0; i < 10; i++ {
b := c.GetPacket()
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)),
checker.AckNum(uint32(790)),
checker.TCPFlags(header.TCPFlagAck),
),
)
// Acknowledge the keepalive.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: c.IRS,
RcvWnd: 30000,
})
}
// Check that the connection is still alive.
if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock)
}
// Send some data and wait before ACKing it. Keepalives should be disabled
// during this period.
view := buffer.NewView(3)
if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %v", err)
}
next := uint32(c.IRS) + 1
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
),
)
// Wait for the packet to be retransmitted. Verify that no keepalives
// were sent.
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(len(view)+header.TCPMinimumSize),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(next),
checker.AckNum(790),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagPsh),
),
)
c.CheckNoPacket("Keepalive packet received while unACKed data is pending")
next += uint32(len(view))
// Send ACK. Keepalives should start sending again.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: c.Port,
Flags: header.TCPFlagAck,
SeqNum: 790,
AckNum: seqnum.Value(next),
RcvWnd: 30000,
})
// Now receive 5 keepalives, but don't ACK them. The connection
// should be reset after 5.
for i := 0; i < 5; i++ {
b := c.GetPacket()
checker.IPv4(t, b,
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(next-1)),
checker.AckNum(uint32(790)),
checker.TCPFlags(header.TCPFlagAck),
),
)
}
// The connection should be terminated after 5 unacked keepalives.
checker.IPv4(t, c.GetPacket(),
checker.TCP(
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(next)),
checker.AckNum(uint32(790)),
checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst),
),
)
if _, _, err := c.EP.Read(nil); err != tcpip.ErrTimeout {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrTimeout)
}
}
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCookieInUse bool) (irs, iss seqnum.Value) {
// Send a SYN request.
irs = seqnum.Value(789)
c.SendPacket(nil, &context.Headers{
SrcPort: srcPort,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: irs,
RcvWnd: 30000,
})
// Receive the SYN-ACK reply.w
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
b := c.GetPacket()
tcp := header.TCP(header.IPv4(b).Payload())
iss = seqnum.Value(tcp.SequenceNumber())
tcpCheckers := []checker.TransportChecker{
checker.SrcPort(context.StackPort),
checker.DstPort(srcPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
checker.AckNum(uint32(irs) + 1),
}
if synCookieInUse {
// When cookies are in use window scaling is disabled.
tcpCheckers = append(tcpCheckers, checker.TCPSynOptions(header.TCPSynOptions{
WS: -1,
MSS: c.MSSWithoutOptions(),
}))
}
checker.IPv4(t, b, checker.TCP(tcpCheckers...))
// Send ACK.
c.SendPacket(nil, &context.Headers{
SrcPort: srcPort,
DstPort: context.StackPort,
Flags: header.TCPFlagAck,
SeqNum: irs + 1,
AckNum: iss + 1,
RcvWnd: 30000,
})
return irs, iss
}
// TestListenBacklogFull tests that netstack does not complete handshakes if the
// listen backlog for the endpoint is full.
func TestListenBacklogFull(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create TCP endpoint.
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
// Bind to wildcard.
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
// Test acceptance.
// Start listening.
listenBacklog := 2
if err := c.EP.Listen(listenBacklog); err != nil {
t.Fatalf("Listen failed: %v", err)
}
for i := 0; i < listenBacklog; i++ {
executeHandshake(t, c, context.TestPort+uint16(i), false /*synCookieInUse */)
}
time.Sleep(50 * time.Millisecond)
// Now execute send one more SYN. The stack should not respond as the backlog
// is full at this point.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort + 2,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: seqnum.Value(789),
RcvWnd: 30000,
})
c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Try to accept the connections in the backlog.
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
for i := 0; i < listenBacklog; i++ {
_, _, err = c.EP.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
_, _, err = c.EP.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
}
// Now verify that there are no more connections that can be accepted.
_, _, err = c.EP.Accept()
if err != tcpip.ErrWouldBlock {
select {
case <-ch:
t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP)
case <-time.After(1 * time.Second):
}
}
// Now a new handshake must succeed.
executeHandshake(t, c, context.TestPort+2, false /*synCookieInUse */)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
newEP, _, err := c.EP.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
newEP, _, err = c.EP.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Now verify that the TCP socket is usable and in a connected state.
data := "Don't panic"
newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{})
b := c.GetPacket()
tcp := header.TCP(header.IPv4(b).Payload())
if string(tcp.Payload()) != data {
t.Fatalf("Unexpected data: got %v, want %v", string(tcp.Payload()), data)
}
}
func TestListenSynRcvdQueueFull(t *testing.T) {
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
c := context.New(t, defaultMTU)
defer c.Cleanup()
// Create TCP endpoint.
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
// Bind to wildcard.
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
// Test acceptance.
// Start listening.
listenBacklog := 1
if err := c.EP.Listen(listenBacklog); err != nil {
t.Fatalf("Listen failed: %v", err)
}
// Send two SYN's the first one should get a SYN-ACK, the
// second one should not get any response and is dropped as
// the synRcvd count will be equal to backlog.
irs := seqnum.Value(789)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: seqnum.Value(789),
RcvWnd: 30000,
})
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Receive the SYN-ACK reply.
b := c.GetPacket()
tcp := header.TCP(header.IPv4(b).Payload())
iss := seqnum.Value(tcp.SequenceNumber())
tcpCheckers := []checker.TransportChecker{
checker.SrcPort(context.StackPort),
checker.DstPort(context.TestPort),
checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
checker.AckNum(uint32(irs) + 1),
}
checker.IPv4(t, b, checker.TCP(tcpCheckers...))
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Now execute send one more SYN. The stack should not respond as the backlog
// is full at this point.
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
//
// NOTE: we did not complete the handshake for the previous one so the
// accept backlog should be empty and there should be one connection in
// synRcvd state.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort + 1,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: seqnum.Value(889),
RcvWnd: 30000,
})
c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Now complete the previous connection and verify that there is a connection
// to accept.
// Send ACK.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
DstPort: context.StackPort,
Flags: header.TCPFlagAck,
SeqNum: irs + 1,
AckNum: iss + 1,
RcvWnd: 30000,
})
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Try to accept the connections in the backlog.
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
newEP, _, err := c.EP.Accept()
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
newEP, _, err = c.EP.Accept()
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
// Now verify that the TCP socket is usable and in a connected state.
data := "Don't panic"
newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{})
pkt := c.GetPacket()
tcp = header.TCP(header.IPv4(pkt).Payload())
if string(tcp.Payload()) != data {
t.Fatalf("Unexpected data: got %v, want %v", string(tcp.Payload()), data)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
}
}
func TestListenBacklogFullSynCookieInUse(t *testing.T) {
saved := tcp.SynRcvdCountThreshold
defer func() {
tcp.SynRcvdCountThreshold = saved
}()
tcp.SynRcvdCountThreshold = 1
c := context.New(t, defaultMTU)
defer c.Cleanup()
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
// Create TCP endpoint.
var err *tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
// Bind to wildcard.
if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
// Test acceptance.
// Start listening.
listenBacklog := 1
portOffset := uint16(0)
if err := c.EP.Listen(listenBacklog); err != nil {
t.Fatalf("Listen failed: %v", err)
}
executeHandshake(t, c, context.TestPort+portOffset, false)
portOffset++
// Wait for this to be delivered to the accept queue.
time.Sleep(50 * time.Millisecond)
// Send a SYN request.
irs := seqnum.Value(789)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: irs,
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
RcvWnd: 30000,
})
// The Syn should be dropped as the endpoint's backlog is full.
c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond)
// Verify that there is only one acceptable connection at this point.
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
_, _, err = c.EP.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
_, _, err = c.EP.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
// Now verify that there are no more connections that can be accepted.
_, _, err = c.EP.Accept()
if err != tcpip.ErrWouldBlock {
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
select {
case <-ch:
t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP)
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
case <-time.After(1 * time.Second):
}
}
}
func TestPassiveConnectionAttemptIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
c.EP = ep
if err := ep.Bind(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if got, want := tcp.EndpointState(ep.State()), tcp.StateBound; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
if err := c.EP.Listen(1); err != nil {
t.Fatalf("Listen failed: %v", err)
}
if got, want := tcp.EndpointState(c.EP.State()), tcp.StateListen; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
stats := c.Stack().Stats()
want := stats.TCP.PassiveConnectionOpenings.Value() + 1
srcPort := uint16(context.TestPort)
executeHandshake(t, c, srcPort+1, false)
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
// Verify that there is only one acceptable connection at this point.
_, _, err = c.EP.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
_, _, err = c.EP.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
if got := stats.TCP.PassiveConnectionOpenings.Value(); got != want {
t.Errorf("got stats.TCP.PassiveConnectionOpenings.Value() = %v, want = %v", got, want)
}
}
func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
stats := c.Stack().Stats()
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
c.EP = ep
if err := c.EP.Bind(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if err := c.EP.Listen(1); err != nil {
t.Fatalf("Listen failed: %v", err)
}
srcPort := uint16(context.TestPort)
// Now attempt a handshakes it will fill up the accept backlog.
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
executeHandshake(t, c, srcPort, false)
// Give time for the final ACK to be processed as otherwise the next handshake could
// get accepted before the previous one based on goroutine scheduling.
time.Sleep(50 * time.Millisecond)
want := stats.TCP.ListenOverflowSynDrop.Value() + 1
// Now we will send one more SYN and this one should get dropped
// Send a SYN request.
c.SendPacket(nil, &context.Headers{
SrcPort: srcPort + 2,
DstPort: context.StackPort,
Flags: header.TCPFlagSyn,
SeqNum: seqnum.Value(789),
RcvWnd: 30000,
})
time.Sleep(50 * time.Millisecond)
if got := stats.TCP.ListenOverflowSynDrop.Value(); got != want {
t.Errorf("got stats.TCP.ListenOverflowSynDrop.Value() = %v, want = %v", got, want)
}
if got := c.EP.Stats().(*tcp.Stats).ReceiveErrors.ListenOverflowSynDrop.Value(); got != want {
t.Errorf("got EP stats Stats.ReceiveErrors.ListenOverflowSynDrop = %v, want = %v", got, want)
}
Fixes to TCP listen behavior. Netstack listen loop can get stuck if cookies are in-use and the app is slow to accept incoming connections. Further we continue to complete handshake for a connection even if the backlog is full. This creates a problem when a lots of connections come in rapidly and we end up with lots of completed connections just hanging around to be delivered. These fixes change netstack behaviour to mirror what linux does as described here in the following article http://veithen.io/2014/01/01/how-tcp-backlog-works-in-linux.html Now when cookies are not in-use Netstack will silently drop the ACK to a SYN-ACK and not complete the handshake if the backlog is full. This will result in the connection staying in a half-complete state. Eventually the sender will retransmit the ACK and if backlog has space we will transition to a connected state and deliver the endpoint. Similarly when cookies are in use we do not try and create an endpoint unless there is space in the accept queue to accept the newly created endpoint. If there is no space then we again silently drop the ACK as we can just recreate it when the ACK is retransmitted by the peer. We also now use the backlog to cap the size of the SYN-RCVD queue for a given endpoint. So at any time there can be N connections in the backlog and N in a SYN-RCVD state if the application is not accepting connections. Any new SYNs will be dropped. This CL also fixes another small bug where we mark a new endpoint which has not completed handshake as connected. We should wait till handshake successfully completes before marking it connected. Updates #236 PiperOrigin-RevId: 250717817
2019-05-30 17:47:11 +00:00
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.EventIn)
defer c.WQ.EventUnregister(&we)
// Now check that there is one acceptable connections.
_, _, err = c.EP.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
_, _, err = c.EP.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
}
func TestEndpointBindListenAcceptState(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
wq := &waiter.Queue{}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
if err != nil {
t.Fatalf("NewEndpoint failed: %v", err)
}
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %v", err)
}
if got, want := tcp.EndpointState(ep.State()), tcp.StateBound; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// Expect InvalidEndpointState errors on a read at this point.
if _, _, err := ep.Read(nil); err != tcpip.ErrInvalidEndpointState {
t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrInvalidEndpointState)
}
if got := ep.Stats().(*tcp.Stats).ReadErrors.InvalidEndpointState.Value(); got != 1 {
t.Fatalf("got EP stats Stats.ReadErrors.InvalidEndpointState got %v want %v", got, 1)
}
if err := ep.Listen(10); err != nil {
t.Fatalf("Listen failed: %v", err)
}
if got, want := tcp.EndpointState(ep.State()), tcp.StateListen; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
c.PassiveConnectWithOptions(100, 5, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
wq.EventRegister(&we, waiter.EventIn)
defer wq.EventUnregister(&we)
aep, _, err := ep.Accept()
if err == tcpip.ErrWouldBlock {
// Wait for connection to be established.
select {
case <-ch:
aep, _, err = ep.Accept()
if err != nil {
t.Fatalf("Accept failed: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatalf("Timed out waiting for accept")
}
}
if got, want := tcp.EndpointState(aep.State()), tcp.StateEstablished; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// Listening endpoint remains in listen state.
if got, want := tcp.EndpointState(ep.State()), tcp.StateListen; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
ep.Close()
// Give worker goroutines time to receive the close notification.
time.Sleep(1 * time.Second)
if got, want := tcp.EndpointState(ep.State()), tcp.StateClose; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
// Accepted endpoint remains open when the listen endpoint is closed.
if got, want := tcp.EndpointState(aep.State()), tcp.StateEstablished; got != want {
t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
}
}
// This test verifies that the auto tuning does not grow the receive buffer if
// the application is not reading the data actively.
func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
const mtu = 1500
const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize
c := context.New(t, mtu)
defer c.Cleanup()
stk := c.Stack()
// Set lower limits for auto-tuning tests. This is required because the
// test stops the worker which can cause packets to be dropped because
// the segment queue holding unprocessed packets is limited to 500.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
// Enable auto-tuning.
if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
// Change the expected window scale to match the value needed for the
// maximum buffer size defined above.
c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
// NOTE: The timestamp values in the sent packets are meaningless to the
// peer so we just increment the timestamp value by 1 every batch as we
// are not really using them for anything. Send a single byte to verify
// the advertised window.
tsVal := rawEP.TSVal + 1
// Introduce a 25ms latency by delaying the first byte.
latency := 25 * time.Millisecond
time.Sleep(latency)
rawEP.SendPacketWithTS([]byte{1}, tsVal)
// Verify that the ACK has the expected window.
wantRcvWnd := receiveBufferSize
wantRcvWnd = (wantRcvWnd >> uint32(c.WindowScale))
rawEP.VerifyACKRcvWnd(uint16(wantRcvWnd - 1))
time.Sleep(25 * time.Millisecond)
// Allocate a large enough payload for the test.
b := make([]byte, int(receiveBufferSize)*2)
offset := 0
payloadSize := receiveBufferSize - 1
worker := (c.EP).(interface {
StopWork()
ResumeWork()
})
tsVal++
// Stop the worker goroutine.
worker.StopWork()
start := offset
end := offset + payloadSize
packetsSent := 0
for ; start < end; start += mss {
rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
packetsSent++
}
// Resume the worker so that it only sees the packets once all of them
// are waiting to be read.
worker.ResumeWork()
// Since we read no bytes the window should goto zero till the
// application reads some of the data.
// Discard all intermediate acks except the last one.
if packetsSent > 100 {
for i := 0; i < (packetsSent / 100); i++ {
_ = c.GetPacket()
}
}
rawEP.VerifyACKRcvWnd(0)
time.Sleep(25 * time.Millisecond)
// Verify that sending more data when window is closed is dropped and
// not acked.
rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
// Verify that the stack sends us back an ACK with the sequence number
// of the last packet sent indicating it was dropped.
p := c.GetPacket()
checker.IPv4(t, p, checker.TCP(
checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)),
checker.Window(0),
))
// Now read all the data from the endpoint and verify that advertised
// window increases to the full available buffer size.
for {
_, _, err := c.EP.Read(nil)
if err == tcpip.ErrWouldBlock {
break
}
}
// Verify that we receive a non-zero window update ACK. When running
// under thread santizer this test can end up sending more than 1
// ack, 1 for the non-zero window
p = c.GetPacket()
checker.IPv4(t, p, checker.TCP(
checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)),
func(t *testing.T, h header.Transport) {
tcp, ok := h.(header.TCP)
if !ok {
return
}
if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) {
t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd)
}
},
))
}
// This test verifies that the auto tuning does not grow the receive buffer if
// the application is not reading the data actively.
func TestReceiveBufferAutoTuning(t *testing.T) {
const mtu = 1500
const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize
c := context.New(t, mtu)
defer c.Cleanup()
// Enable Auto-tuning.
stk := c.Stack()
// Set lower limits for auto-tuning tests. This is required because the
// test stops the worker which can cause packets to be dropped because
// the segment queue holding unprocessed packets is limited to 500.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
// Enable auto-tuning.
if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
// Change the expected window scale to match the value needed for the
// maximum buffer size used by stack.
c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
wantRcvWnd := receiveBufferSize
scaleRcvWnd := func(rcvWnd int) uint16 {
return uint16(rcvWnd >> uint16(c.WindowScale))
}
// Allocate a large array to send to the endpoint.
b := make([]byte, receiveBufferSize*48)
// In every iteration we will send double the number of bytes sent in
// the previous iteration and read the same from the app. The received
// window should grow by at least 2x of bytes read by the app in every
// RTT.
offset := 0
payloadSize := receiveBufferSize / 8
worker := (c.EP).(interface {
StopWork()
ResumeWork()
})
tsVal := rawEP.TSVal
// We are going to do our own computation of what the moderated receive
// buffer should be based on sent/copied data per RTT and verify that
// the advertised window by the stack matches our calculations.
prevCopied := 0
done := false
latency := 1 * time.Millisecond
for i := 0; !done; i++ {
tsVal++
// Stop the worker goroutine.
worker.StopWork()
start := offset
end := offset + payloadSize
totalSent := 0
packetsSent := 0
for ; start < end; start += mss {
rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
totalSent += mss
packetsSent++
}
// Resume it so that it only sees the packets once all of them
// are waiting to be read.
worker.ResumeWork()
// Give 1ms for the worker to process the packets.
time.Sleep(1 * time.Millisecond)
// Verify that the advertised window on the ACK is reduced by
// the total bytes sent.
expectedWnd := wantRcvWnd - totalSent
if packetsSent > 100 {
for i := 0; i < (packetsSent / 100); i++ {
_ = c.GetPacket()
}
}
rawEP.VerifyACKRcvWnd(scaleRcvWnd(expectedWnd))
// Now read all the data from the endpoint and invoke the
// moderation API to allow for receive buffer auto-tuning
// to happen before we measure the new window.
totalCopied := 0
for {
b, _, err := c.EP.Read(nil)
if err == tcpip.ErrWouldBlock {
break
}
totalCopied += len(b)
}
// Invoke the moderation API. This is required for auto-tuning
// to happen. This method is normally expected to be invoked
// from a higher layer than tcpip.Endpoint. So we simulate
// copying to user-space by invoking it explicitly here.
c.EP.ModerateRecvBuf(totalCopied)
// Now send a keep-alive packet to trigger an ACK so that we can
// measure the new window.
rawEP.NextSeqNum--
rawEP.SendPacketWithTS(nil, tsVal)
rawEP.NextSeqNum++
if i == 0 {
// In the first iteration the receiver based RTT is not
// yet known as a result the moderation code should not
// increase the advertised window.
rawEP.VerifyACKRcvWnd(scaleRcvWnd(wantRcvWnd))
prevCopied = totalCopied
} else {
rttCopied := totalCopied
if i == 1 {
// The moderation code accumulates copied bytes till
// RTT is established. So add in the bytes sent in
// the first iteration to the total bytes for this
// RTT.
rttCopied += prevCopied
// Now reset it to the initial value used by the
// auto tuning logic.
prevCopied = tcp.InitialCwnd * mss * 2
}
newWnd := rttCopied<<1 + 16*mss
grow := (newWnd * (rttCopied - prevCopied)) / prevCopied
newWnd += (grow << 1)
if newWnd > maxReceiveBufferSize {
newWnd = maxReceiveBufferSize
done = true
}
rawEP.VerifyACKRcvWnd(scaleRcvWnd(newWnd))
wantRcvWnd = newWnd
prevCopied = rttCopied
// Increase the latency after first two iterations to
// establish a low RTT value in the receiver since it
// only tracks the lowest value. This ensures that when
// ModerateRcvBuf is called the elapsed time is always >
// rtt. Without this the test is flaky due to delays due
// to scheduling/wakeup etc.
latency += 50 * time.Millisecond
}
time.Sleep(latency)
offset += payloadSize
payloadSize *= 2
}
}
func TestDelayEnabled(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
checkDelayOption(t, c, false, 0) // Delay is disabled by default.
for _, v := range []struct {
delayEnabled tcp.DelayEnabled
wantDelayOption int
}{
{delayEnabled: false, wantDelayOption: 0},
{delayEnabled: true, wantDelayOption: 1},
} {
c := context.New(t, defaultMTU)
defer c.Cleanup()
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, v.delayEnabled); err != nil {
t.Fatalf("SetTransportProtocolOption(tcp, %t) failed: %v", v.delayEnabled, err)
}
checkDelayOption(t, c, v.delayEnabled, v.wantDelayOption)
}
}
func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcp.DelayEnabled, wantDelayOption int) {
t.Helper()
var gotDelayEnabled tcp.DelayEnabled
if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &gotDelayEnabled); err != nil {
t.Fatalf("TransportProtocolOption(tcp, &gotDelayEnabled) failed: %v", err)
}
if gotDelayEnabled != wantDelayEnabled {
t.Errorf("TransportProtocolOption(tcp, &gotDelayEnabled) got %t, want %t", gotDelayEnabled, wantDelayEnabled)
}
ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, new(waiter.Queue))
if err != nil {
t.Fatalf("NewEndPoint(tcp, ipv4, new(waiter.Queue)) failed: %v", err)
}
gotDelayOption, err := ep.GetSockOptInt(tcpip.DelayOption)
if err != nil {
t.Fatalf("ep.GetSockOptInt(tcpip.DelayOption) failed: %v", err)
}
if gotDelayOption != wantDelayOption {
t.Errorf("ep.GetSockOptInt(tcpip.DelayOption) got: %d, want: %d", gotDelayOption, wantDelayOption)
}
}