593 lines
21 KiB
Go
593 lines
21 KiB
Go
// Copyright 2018 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tcp
|
|
|
|
import (
|
|
"container/heap"
|
|
"math"
|
|
"time"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
|
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
|
|
)
|
|
|
|
// receiver holds the state necessary to receive TCP segments and turn them
|
|
// into a stream of bytes.
|
|
//
|
|
// +stateify savable
|
|
type receiver struct {
|
|
ep *endpoint
|
|
|
|
rcvNxt seqnum.Value
|
|
|
|
// rcvAcc is one beyond the last acceptable sequence number. That is,
|
|
// the "largest" sequence value that the receiver has announced to the
|
|
// its peer that it's willing to accept. This may be different than
|
|
// rcvNxt + rcvWnd if the receive window is reduced; in that case we
|
|
// have to reduce the window as we receive more data instead of
|
|
// shrinking it.
|
|
rcvAcc seqnum.Value
|
|
|
|
// rcvWnd is the non-scaled receive window last advertised to the peer.
|
|
rcvWnd seqnum.Size
|
|
|
|
// rcvWUP is the rcvNxt value at the last window update sent.
|
|
rcvWUP seqnum.Value
|
|
|
|
rcvWndScale uint8
|
|
|
|
// prevBufused is the snapshot of endpoint rcvBufUsed taken when we
|
|
// advertise a receive window.
|
|
prevBufUsed int
|
|
|
|
closed bool
|
|
|
|
// pendingRcvdSegments is bounded by the receive buffer size of the
|
|
// endpoint.
|
|
pendingRcvdSegments segmentHeap
|
|
// pendingBufUsed tracks the total number of bytes (including segment
|
|
// overhead) currently queued in pendingRcvdSegments.
|
|
pendingBufUsed int
|
|
|
|
// Time when the last ack was received.
|
|
lastRcvdAckTime time.Time `state:".(unixTime)"`
|
|
}
|
|
|
|
func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver {
|
|
return &receiver{
|
|
ep: ep,
|
|
rcvNxt: irs + 1,
|
|
rcvAcc: irs.Add(rcvWnd + 1),
|
|
rcvWnd: rcvWnd,
|
|
rcvWUP: irs + 1,
|
|
rcvWndScale: rcvWndScale,
|
|
lastRcvdAckTime: time.Now(),
|
|
}
|
|
}
|
|
|
|
// acceptable checks if the segment sequence number range is acceptable
|
|
// according to the table on page 26 of RFC 793.
|
|
func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
|
|
// r.rcvWnd could be much larger than the window size we advertised in our
|
|
// outgoing packets, we should use what we have advertised for acceptability
|
|
// test.
|
|
scaledWindowSize := r.rcvWnd >> r.rcvWndScale
|
|
if scaledWindowSize > math.MaxUint16 {
|
|
// This is what we actually put in the Window field.
|
|
scaledWindowSize = math.MaxUint16
|
|
}
|
|
advertisedWindowSize := scaledWindowSize << r.rcvWndScale
|
|
return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvNxt.Add(advertisedWindowSize))
|
|
}
|
|
|
|
// currentWindow returns the available space in the window that was advertised
|
|
// last to our peer.
|
|
func (r *receiver) currentWindow() (curWnd seqnum.Size) {
|
|
endOfWnd := r.rcvWUP.Add(r.rcvWnd)
|
|
if endOfWnd.LessThan(r.rcvNxt) {
|
|
// return 0 if r.rcvNxt is past the end of the previously advertised window.
|
|
// This can happen because we accept a large segment completely even if
|
|
// accepting it causes it to partially exceed the advertised window.
|
|
return 0
|
|
}
|
|
return r.rcvNxt.Size(endOfWnd)
|
|
}
|
|
|
|
// getSendParams returns the parameters needed by the sender when building
|
|
// segments to send.
|
|
func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
|
|
newWnd := r.ep.selectWindow()
|
|
curWnd := r.currentWindow()
|
|
unackLen := int(r.ep.snd.maxSentAck.Size(r.rcvNxt))
|
|
bufUsed := r.ep.receiveBufferUsed()
|
|
|
|
// Grow the right edge of the window only for payloads larger than the
|
|
// the segment overhead OR if the application is actively consuming data.
|
|
//
|
|
// Avoiding growing the right edge otherwise, addresses a situation below:
|
|
// An application has been slow in reading data and we have burst of
|
|
// incoming segments lengths < segment overhead. Here, our available free
|
|
// memory would reduce drastically when compared to the advertised receive
|
|
// window.
|
|
//
|
|
// For example: With incoming 512 bytes segments, segment overhead of
|
|
// 552 bytes (at the time of writing this comment), with receive window
|
|
// starting from 1MB and with rcvAdvWndScale being 1, buffer would reach 0
|
|
// when the curWnd is still 19436 bytes, because for every incoming segment
|
|
// newWnd would reduce by (552+512) >> rcvAdvWndScale (current value 1),
|
|
// while curWnd would reduce by 512 bytes.
|
|
// Such a situation causes us to keep tail dropping the incoming segments
|
|
// and never advertise zero receive window to the peer.
|
|
//
|
|
// Linux does a similar check for minimal sk_buff size (128):
|
|
// https://github.com/torvalds/linux/blob/d5beb3140f91b1c8a3d41b14d729aefa4dcc58bc/net/ipv4/tcp_input.c#L783
|
|
//
|
|
// Also, if the application is reading the data, we keep growing the right
|
|
// edge, as we are still advertising a window that we think can be serviced.
|
|
toGrow := unackLen >= SegSize || bufUsed <= r.prevBufUsed
|
|
|
|
// Update rcvAcc only if new window is > previously advertised window. We
|
|
// should never shrink the acceptable sequence space once it has been
|
|
// advertised the peer. If we shrink the acceptable sequence space then we
|
|
// would end up dropping bytes that might already be in flight.
|
|
// ==================================================== sequence space.
|
|
// ^ ^ ^ ^
|
|
// rcvWUP rcvNxt rcvAcc new rcvAcc
|
|
// <=====curWnd ===>
|
|
// <========= newWnd > curWnd ========= >
|
|
if r.rcvNxt.Add(seqnum.Size(curWnd)).LessThan(r.rcvNxt.Add(seqnum.Size(newWnd))) && toGrow {
|
|
// If the new window moves the right edge, then update rcvAcc.
|
|
r.rcvAcc = r.rcvNxt.Add(seqnum.Size(newWnd))
|
|
} else {
|
|
if newWnd == 0 {
|
|
// newWnd is zero but we can't advertise a zero as it would cause window
|
|
// to shrink so just increment a metric to record this event.
|
|
r.ep.stats.ReceiveErrors.WantZeroRcvWindow.Increment()
|
|
}
|
|
newWnd = curWnd
|
|
}
|
|
// Stash away the non-scaled receive window as we use it for measuring
|
|
// receiver's estimated RTT.
|
|
r.rcvWnd = newWnd
|
|
r.rcvWUP = r.rcvNxt
|
|
r.prevBufUsed = bufUsed
|
|
scaledWnd := r.rcvWnd >> r.rcvWndScale
|
|
if scaledWnd == 0 {
|
|
// Increment a metric if we are advertising an actual zero window.
|
|
r.ep.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
|
|
}
|
|
|
|
// If we started off with a window larger than what can he held in
|
|
// the 16bit window field, we ceil the value to the max value.
|
|
if scaledWnd > math.MaxUint16 {
|
|
scaledWnd = seqnum.Size(math.MaxUint16)
|
|
|
|
// Ensure that the stashed receive window always reflects what
|
|
// is being advertised.
|
|
r.rcvWnd = scaledWnd << r.rcvWndScale
|
|
}
|
|
return r.rcvNxt, scaledWnd
|
|
}
|
|
|
|
// nonZeroWindow is called when the receive window grows from zero to nonzero;
|
|
// in such cases we may need to send an ack to indicate to our peer that it can
|
|
// resume sending data.
|
|
func (r *receiver) nonZeroWindow() {
|
|
// Immediately send an ack.
|
|
r.ep.snd.sendAck()
|
|
}
|
|
|
|
// consumeSegment attempts to consume a segment that was received by r. The
|
|
// segment may have just been received or may have been received earlier but
|
|
// wasn't ready to be consumed then.
|
|
//
|
|
// Returns true if the segment was consumed, false if it cannot be consumed
|
|
// yet because of a missing segment.
|
|
func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum.Size) bool {
|
|
if segLen > 0 {
|
|
// If the segment doesn't include the seqnum we're expecting to
|
|
// consume now, we're missing a segment. We cannot proceed until
|
|
// we receive that segment though.
|
|
if !r.rcvNxt.InWindow(segSeq, segLen) {
|
|
return false
|
|
}
|
|
|
|
// Trim segment to eliminate already acknowledged data.
|
|
if segSeq.LessThan(r.rcvNxt) {
|
|
diff := segSeq.Size(r.rcvNxt)
|
|
segLen -= diff
|
|
segSeq.UpdateForward(diff)
|
|
s.sequenceNumber.UpdateForward(diff)
|
|
s.data.TrimFront(int(diff))
|
|
}
|
|
|
|
// Move segment to ready-to-deliver list. Wakeup any waiters.
|
|
r.ep.readyToRead(s)
|
|
|
|
} else if segSeq != r.rcvNxt {
|
|
return false
|
|
}
|
|
|
|
// Update the segment that we're expecting to consume.
|
|
r.rcvNxt = segSeq.Add(segLen)
|
|
|
|
// In cases of a misbehaving sender which could send more than the
|
|
// advertised window, we could end up in a situation where we get a
|
|
// segment that exceeds the window advertised. Instead of partially
|
|
// accepting the segment and discarding bytes beyond the advertised
|
|
// window, we accept the whole segment and make sure r.rcvAcc is moved
|
|
// forward to match r.rcvNxt to indicate that the window is now closed.
|
|
//
|
|
// In absence of this check the r.acceptable() check fails and accepts
|
|
// segments that should be dropped because rcvWnd is calculated as
|
|
// the size of the interval (rcvNxt, rcvAcc] which becomes extremely
|
|
// large if rcvAcc is ever less than rcvNxt.
|
|
if r.rcvAcc.LessThan(r.rcvNxt) {
|
|
r.rcvAcc = r.rcvNxt
|
|
}
|
|
|
|
// Trim SACK Blocks to remove any SACK information that covers
|
|
// sequence numbers that have been consumed.
|
|
TrimSACKBlockList(&r.ep.sack, r.rcvNxt)
|
|
|
|
// Handle FIN or FIN-ACK.
|
|
if s.flagIsSet(header.TCPFlagFin) {
|
|
r.rcvNxt++
|
|
|
|
// Send ACK immediately.
|
|
r.ep.snd.sendAck()
|
|
|
|
// Tell any readers that no more data will come.
|
|
r.closed = true
|
|
r.ep.readyToRead(nil)
|
|
|
|
// We just received a FIN, our next state depends on whether we sent a
|
|
// FIN already or not.
|
|
switch r.ep.EndpointState() {
|
|
case StateEstablished:
|
|
r.ep.setEndpointState(StateCloseWait)
|
|
case StateFinWait1:
|
|
if s.flagIsSet(header.TCPFlagAck) && s.ackNumber == r.ep.snd.sndNxt {
|
|
// FIN-ACK, transition to TIME-WAIT.
|
|
r.ep.setEndpointState(StateTimeWait)
|
|
} else {
|
|
// Simultaneous close, expecting a final ACK.
|
|
r.ep.setEndpointState(StateClosing)
|
|
}
|
|
case StateFinWait2:
|
|
r.ep.setEndpointState(StateTimeWait)
|
|
}
|
|
|
|
// Flush out any pending segments, except the very first one if
|
|
// it happens to be the one we're handling now because the
|
|
// caller is using it.
|
|
first := 0
|
|
if len(r.pendingRcvdSegments) != 0 && r.pendingRcvdSegments[0] == s {
|
|
first = 1
|
|
}
|
|
|
|
for i := first; i < len(r.pendingRcvdSegments); i++ {
|
|
r.pendingBufUsed -= r.pendingRcvdSegments[i].segMemSize()
|
|
r.pendingRcvdSegments[i].decRef()
|
|
|
|
// Note that slice truncation does not allow garbage collection of
|
|
// truncated items, thus truncated items must be set to nil to avoid
|
|
// memory leaks.
|
|
r.pendingRcvdSegments[i] = nil
|
|
}
|
|
r.pendingRcvdSegments = r.pendingRcvdSegments[:first]
|
|
|
|
return true
|
|
}
|
|
|
|
// Handle ACK (not FIN-ACK, which we handled above) during one of the
|
|
// shutdown states.
|
|
if s.flagIsSet(header.TCPFlagAck) && s.ackNumber == r.ep.snd.sndNxt {
|
|
switch r.ep.EndpointState() {
|
|
case StateFinWait1:
|
|
r.ep.setEndpointState(StateFinWait2)
|
|
// Notify protocol goroutine that we have received an
|
|
// ACK to our FIN so that it can start the FIN_WAIT2
|
|
// timer to abort connection if the other side does
|
|
// not close within 2MSL.
|
|
r.ep.notifyProtocolGoroutine(notifyClose)
|
|
case StateClosing:
|
|
r.ep.setEndpointState(StateTimeWait)
|
|
case StateLastAck:
|
|
r.ep.transitionToStateCloseLocked()
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// updateRTT updates the receiver RTT measurement based on the sequence number
|
|
// of the received segment.
|
|
func (r *receiver) updateRTT() {
|
|
// From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf
|
|
//
|
|
// A system that is only transmitting acknowledgements can still
|
|
// estimate the round-trip time by observing the time between when a byte
|
|
// is first acknowledged and the receipt of data that is at least one
|
|
// window beyond the sequence number that was acknowledged.
|
|
r.ep.rcvListMu.Lock()
|
|
if r.ep.rcvAutoParams.rttMeasureTime.IsZero() {
|
|
// New measurement.
|
|
r.ep.rcvAutoParams.rttMeasureTime = time.Now()
|
|
r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
|
|
r.ep.rcvListMu.Unlock()
|
|
return
|
|
}
|
|
if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) {
|
|
r.ep.rcvListMu.Unlock()
|
|
return
|
|
}
|
|
rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime)
|
|
// We only store the minimum observed RTT here as this is only used in
|
|
// absence of a SRTT available from either timestamps or a sender
|
|
// measurement of RTT.
|
|
if r.ep.rcvAutoParams.rtt == 0 || rtt < r.ep.rcvAutoParams.rtt {
|
|
r.ep.rcvAutoParams.rtt = rtt
|
|
}
|
|
r.ep.rcvAutoParams.rttMeasureTime = time.Now()
|
|
r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
|
|
r.ep.rcvListMu.Unlock()
|
|
}
|
|
|
|
func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err tcpip.Error) {
|
|
r.ep.rcvListMu.Lock()
|
|
rcvClosed := r.ep.rcvClosed || r.closed
|
|
r.ep.rcvListMu.Unlock()
|
|
|
|
// If we are in one of the shutdown states then we need to do
|
|
// additional checks before we try and process the segment.
|
|
switch state {
|
|
case StateCloseWait, StateClosing, StateLastAck:
|
|
if !s.sequenceNumber.LessThanEq(r.rcvNxt) {
|
|
// Just drop the segment as we have
|
|
// already received a FIN and this
|
|
// segment is after the sequence number
|
|
// for the FIN.
|
|
return true, nil
|
|
}
|
|
fallthrough
|
|
case StateFinWait1, StateFinWait2:
|
|
// If the ACK acks something not yet sent then we send an ACK.
|
|
//
|
|
// RFC793, page 37: If the connection is in a synchronized state,
|
|
// (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK,
|
|
// TIME-WAIT), any unacceptable segment (out of window sequence number
|
|
// or unacceptable acknowledgment number) must elicit only an empty
|
|
// acknowledgment segment containing the current send-sequence number
|
|
// and an acknowledgment indicating the next sequence number expected
|
|
// to be received, and the connection remains in the same state.
|
|
//
|
|
// Just as on Linux, we do not apply this behavior when state is
|
|
// ESTABLISHED.
|
|
// Linux receive processing for all states except ESTABLISHED and
|
|
// TIME_WAIT is here where if the ACK check fails, we attempt to
|
|
// reply back with an ACK with correct seq/ack numbers.
|
|
// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L6186
|
|
// The ESTABLISHED state processing is here where if the ACK check
|
|
// fails, we ignore the packet:
|
|
// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591
|
|
if r.ep.snd.sndNxt.LessThan(s.ackNumber) {
|
|
r.ep.snd.maybeSendOutOfWindowAck(s)
|
|
return true, nil
|
|
}
|
|
|
|
// If we are closed for reads (either due to an
|
|
// incoming FIN or the user calling shutdown(..,
|
|
// SHUT_RD) then any data past the rcvNxt should
|
|
// trigger a RST.
|
|
endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size()))
|
|
if state != StateCloseWait && rcvClosed && r.rcvNxt.LessThan(endDataSeq) {
|
|
return true, &tcpip.ErrConnectionAborted{}
|
|
}
|
|
if state == StateFinWait1 {
|
|
break
|
|
}
|
|
|
|
// If it's a retransmission of an old data segment
|
|
// or a pure ACK then allow it.
|
|
if s.sequenceNumber.Add(s.logicalLen()).LessThanEq(r.rcvNxt) ||
|
|
s.logicalLen() == 0 {
|
|
break
|
|
}
|
|
|
|
// In FIN-WAIT2 if the socket is fully
|
|
// closed(not owned by application on our end
|
|
// then the only acceptable segment is a
|
|
// FIN. Since FIN can technically also carry
|
|
// data we verify that the segment carrying a
|
|
// FIN ends at exactly e.rcvNxt+1.
|
|
//
|
|
// From RFC793 page 25.
|
|
//
|
|
// For sequence number purposes, the SYN is
|
|
// considered to occur before the first actual
|
|
// data octet of the segment in which it occurs,
|
|
// while the FIN is considered to occur after
|
|
// the last actual data octet in a segment in
|
|
// which it occurs.
|
|
if closed && (!s.flagIsSet(header.TCPFlagFin) || s.sequenceNumber.Add(s.logicalLen()) != r.rcvNxt+1) {
|
|
return true, &tcpip.ErrConnectionAborted{}
|
|
}
|
|
}
|
|
|
|
// We don't care about receive processing anymore if the receive side
|
|
// is closed.
|
|
//
|
|
// NOTE: We still want to permit a FIN as it's possible only our
|
|
// end has closed and the peer is yet to send a FIN. Hence we
|
|
// compare only the payload.
|
|
segEnd := s.sequenceNumber.Add(seqnum.Size(s.data.Size()))
|
|
if rcvClosed && !segEnd.LessThanEq(r.rcvNxt) {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// handleRcvdSegment handles TCP segments directed at the connection managed by
|
|
// r as they arrive. It is called by the protocol main loop.
|
|
func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) {
|
|
state := r.ep.EndpointState()
|
|
closed := r.ep.closed
|
|
|
|
segLen := seqnum.Size(s.data.Size())
|
|
segSeq := s.sequenceNumber
|
|
|
|
// If the sequence number range is outside the acceptable range, just
|
|
// send an ACK and stop further processing of the segment.
|
|
// This is according to RFC 793, page 68.
|
|
if !r.acceptable(segSeq, segLen) {
|
|
r.ep.snd.maybeSendOutOfWindowAck(s)
|
|
return true, nil
|
|
}
|
|
|
|
if state != StateEstablished {
|
|
drop, err := r.handleRcvdSegmentClosing(s, state, closed)
|
|
if drop || err != nil {
|
|
return drop, err
|
|
}
|
|
}
|
|
|
|
// Store the time of the last ack.
|
|
r.lastRcvdAckTime = time.Now()
|
|
|
|
// Defer segment processing if it can't be consumed now.
|
|
if !r.consumeSegment(s, segSeq, segLen) {
|
|
if segLen > 0 || s.flagIsSet(header.TCPFlagFin) {
|
|
// We only store the segment if it's within our buffer size limit.
|
|
//
|
|
// Only use 75% of the receive buffer queue for out-of-order
|
|
// segments. This ensures that we always leave some space for the inorder
|
|
// segments to arrive allowing pending segments to be processed and
|
|
// delivered to the user.
|
|
if r.ep.receiveBufferAvailable() > 0 && r.pendingBufUsed < r.ep.receiveBufferSize()>>2 {
|
|
r.ep.rcvListMu.Lock()
|
|
r.pendingBufUsed += s.segMemSize()
|
|
r.ep.rcvListMu.Unlock()
|
|
s.incRef()
|
|
heap.Push(&r.pendingRcvdSegments, s)
|
|
UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt)
|
|
}
|
|
|
|
// Immediately send an ack so that the peer knows it may
|
|
// have to retransmit.
|
|
r.ep.snd.sendAck()
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// Since we consumed a segment update the receiver's RTT estimate
|
|
// if required.
|
|
if segLen > 0 {
|
|
r.updateRTT()
|
|
}
|
|
|
|
// By consuming the current segment, we may have filled a gap in the
|
|
// sequence number domain that allows pending segments to be consumed
|
|
// now. So try to do it.
|
|
for !r.closed && r.pendingRcvdSegments.Len() > 0 {
|
|
s := r.pendingRcvdSegments[0]
|
|
segLen := seqnum.Size(s.data.Size())
|
|
segSeq := s.sequenceNumber
|
|
|
|
// Skip segment altogether if it has already been acknowledged.
|
|
if !segSeq.Add(segLen-1).LessThan(r.rcvNxt) &&
|
|
!r.consumeSegment(s, segSeq, segLen) {
|
|
break
|
|
}
|
|
|
|
heap.Pop(&r.pendingRcvdSegments)
|
|
r.ep.rcvListMu.Lock()
|
|
r.pendingBufUsed -= s.segMemSize()
|
|
r.ep.rcvListMu.Unlock()
|
|
s.decRef()
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// handleTimeWaitSegment handles inbound segments received when the endpoint
|
|
// has entered the TIME_WAIT state.
|
|
func (r *receiver) handleTimeWaitSegment(s *segment) (resetTimeWait bool, newSyn bool) {
|
|
segSeq := s.sequenceNumber
|
|
segLen := seqnum.Size(s.data.Size())
|
|
|
|
// Just silently drop any RST packets in TIME_WAIT. We do not support
|
|
// TIME_WAIT assasination as a result we confirm w/ fix 1 as described
|
|
// in https://tools.ietf.org/html/rfc1337#section-3.
|
|
//
|
|
// This behavior overrides RFC793 page 70 where we transition to CLOSED
|
|
// on receiving RST, which is also default Linux behavior.
|
|
// On Linux the RST can be ignored by setting sysctl net.ipv4.tcp_rfc1337.
|
|
//
|
|
// As we do not yet support PAWS, we are being conservative in ignoring
|
|
// RSTs by default.
|
|
if s.flagIsSet(header.TCPFlagRst) {
|
|
return false, false
|
|
}
|
|
|
|
// If it's a SYN and the sequence number is higher than any seen before
|
|
// for this connection then try and redirect it to a listening endpoint
|
|
// if available.
|
|
//
|
|
// RFC 1122:
|
|
// "When a connection is [...] on TIME-WAIT state [...]
|
|
// [a TCP] MAY accept a new SYN from the remote TCP to
|
|
// reopen the connection directly, if it:
|
|
|
|
// (1) assigns its initial sequence number for the new
|
|
// connection to be larger than the largest sequence
|
|
// number it used on the previous connection incarnation,
|
|
// and
|
|
|
|
// (2) returns to TIME-WAIT state if the SYN turns out
|
|
// to be an old duplicate".
|
|
if s.flagIsSet(header.TCPFlagSyn) && r.rcvNxt.LessThan(segSeq) {
|
|
|
|
return false, true
|
|
}
|
|
|
|
// Drop the segment if it does not contain an ACK.
|
|
if !s.flagIsSet(header.TCPFlagAck) {
|
|
return false, false
|
|
}
|
|
|
|
// Update Timestamp if required. See RFC7323, section-4.3.
|
|
if r.ep.sendTSOk && s.parsedOptions.TS {
|
|
r.ep.updateRecentTimestamp(s.parsedOptions.TSVal, r.ep.snd.maxSentAck, segSeq)
|
|
}
|
|
|
|
if segSeq.Add(1) == r.rcvNxt && s.flagIsSet(header.TCPFlagFin) {
|
|
// If it's a FIN-ACK then resetTimeWait and send an ACK, as it
|
|
// indicates our final ACK could have been lost.
|
|
r.ep.snd.sendAck()
|
|
return true, false
|
|
}
|
|
|
|
// If the sequence number range is outside the acceptable range or
|
|
// carries data then just send an ACK. This is according to RFC 793,
|
|
// page 37.
|
|
//
|
|
// NOTE: In TIME_WAIT the only acceptable sequence number is rcvNxt.
|
|
if segSeq != r.rcvNxt || segLen != 0 {
|
|
r.ep.snd.sendAck()
|
|
}
|
|
return false, false
|
|
}
|