Merge release-20210125.0-24-gff4fc4278 (automated)
This commit is contained in:
commit
95a799b374
|
@ -76,12 +76,16 @@ type TCPCubicState struct {
|
|||
// TCPRACKState is used to hold a copy of the internal RACK state when the
|
||||
// TCPProbeFunc is invoked.
|
||||
type TCPRACKState struct {
|
||||
XmitTime time.Time
|
||||
EndSequence seqnum.Value
|
||||
FACK seqnum.Value
|
||||
RTT time.Duration
|
||||
Reord bool
|
||||
DSACKSeen bool
|
||||
XmitTime time.Time
|
||||
EndSequence seqnum.Value
|
||||
FACK seqnum.Value
|
||||
RTT time.Duration
|
||||
Reord bool
|
||||
DSACKSeen bool
|
||||
ReoWnd time.Duration
|
||||
ReoWndIncr uint8
|
||||
ReoWndPersist int8
|
||||
RTTSeq seqnum.Value
|
||||
}
|
||||
|
||||
// TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
|
||||
|
|
|
@ -3031,12 +3031,16 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
|
|||
|
||||
rc := &e.snd.rc
|
||||
s.Sender.RACKState = stack.TCPRACKState{
|
||||
XmitTime: rc.xmitTime,
|
||||
EndSequence: rc.endSequence,
|
||||
FACK: rc.fack,
|
||||
RTT: rc.rtt,
|
||||
Reord: rc.reorderSeen,
|
||||
DSACKSeen: rc.dsackSeen,
|
||||
XmitTime: rc.xmitTime,
|
||||
EndSequence: rc.endSequence,
|
||||
FACK: rc.fack,
|
||||
RTT: rc.rtt,
|
||||
Reord: rc.reorderSeen,
|
||||
DSACKSeen: rc.dsackSeen,
|
||||
ReoWnd: rc.reoWnd,
|
||||
ReoWndIncr: rc.reoWndIncr,
|
||||
ReoWndPersist: rc.reoWndPersist,
|
||||
RTTSeq: rc.rttSeq,
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
|
|
@ -22,12 +22,21 @@ import (
|
|||
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
|
||||
)
|
||||
|
||||
// wcDelayedACKTimeout is the recommended maximum delayed ACK timer value as
|
||||
// defined in https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.5.
|
||||
// It stands for worst case delayed ACK timer (WCDelAckT). When FlightSize is
|
||||
// 1, PTO is inflated by WCDelAckT time to compensate for a potential long
|
||||
// delayed ACK timer at the receiver.
|
||||
const wcDelayedACKTimeout = 200 * time.Millisecond
|
||||
const (
|
||||
// wcDelayedACKTimeout is the recommended maximum delayed ACK timer
|
||||
// value as defined in the RFC. It stands for worst case delayed ACK
|
||||
// timer (WCDelAckT). When FlightSize is 1, PTO is inflated by
|
||||
// WCDelAckT time to compensate for a potential long delayed ACK timer
|
||||
// at the receiver.
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.5.
|
||||
wcDelayedACKTimeout = 200 * time.Millisecond
|
||||
|
||||
// tcpRACKRecoveryThreshold is the number of loss recoveries for which
|
||||
// the reorder window is inflated and after that the reorder window is
|
||||
// reset to its initial value of minRTT/4.
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2.
|
||||
tcpRACKRecoveryThreshold = 16
|
||||
)
|
||||
|
||||
// RACK is a loss detection algorithm used in TCP to detect packet loss and
|
||||
// reordering using transmission timestamp of the packets instead of packet or
|
||||
|
@ -44,6 +53,11 @@ type rackControl struct {
|
|||
// endSequence is the ending TCP sequence number of rackControl.seg.
|
||||
endSequence seqnum.Value
|
||||
|
||||
// exitedRecovery indicates if the connection is exiting loss recovery.
|
||||
// This flag is set if the sender is leaving the recovery after
|
||||
// receiving an ACK and is reset during updating of reorder window.
|
||||
exitedRecovery bool
|
||||
|
||||
// fack is the highest selectively or cumulatively acknowledged
|
||||
// sequence.
|
||||
fack seqnum.Value
|
||||
|
@ -51,15 +65,30 @@ type rackControl struct {
|
|||
// minRTT is the estimated minimum RTT of the connection.
|
||||
minRTT time.Duration
|
||||
|
||||
// reorderSeen indicates if reordering has been detected on this
|
||||
// connection.
|
||||
reorderSeen bool
|
||||
|
||||
// reoWnd is the reordering window time used for recording packet
|
||||
// transmission times. It is used to defer the moment at which RACK
|
||||
// marks a packet lost.
|
||||
reoWnd time.Duration
|
||||
|
||||
// reoWndIncr is the multiplier applied to adjust reorder window.
|
||||
reoWndIncr uint8
|
||||
|
||||
// reoWndPersist is the number of loss recoveries before resetting
|
||||
// reorder window.
|
||||
reoWndPersist int8
|
||||
|
||||
// rtt is the RTT of the most recently delivered packet on the
|
||||
// connection (either cumulatively acknowledged or selectively
|
||||
// acknowledged) that was not marked invalid as a possible spurious
|
||||
// retransmission.
|
||||
rtt time.Duration
|
||||
|
||||
// reorderSeen indicates if reordering has been detected on this
|
||||
// connection.
|
||||
reorderSeen bool
|
||||
// rttSeq is the SND.NXT when rtt is updated.
|
||||
rttSeq seqnum.Value
|
||||
|
||||
// xmitTime is the latest transmission timestamp of rackControl.seg.
|
||||
xmitTime time.Time `state:".(unixTime)"`
|
||||
|
@ -75,29 +104,36 @@ type rackControl struct {
|
|||
// tlpHighRxt the value of sender.sndNxt at the time of sending
|
||||
// a TLP retransmission.
|
||||
tlpHighRxt seqnum.Value
|
||||
|
||||
// snd is a reference to the sender.
|
||||
snd *sender
|
||||
}
|
||||
|
||||
// init initializes RACK specific fields.
|
||||
func (rc *rackControl) init() {
|
||||
func (rc *rackControl) init(snd *sender, iss seqnum.Value) {
|
||||
rc.fack = iss
|
||||
rc.reoWndIncr = 1
|
||||
rc.snd = snd
|
||||
rc.probeTimer.init(&rc.probeWaker)
|
||||
}
|
||||
|
||||
// update will update the RACK related fields when an ACK has been received.
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
|
||||
func (rc *rackControl) update(seg *segment, ackSeg *segment, offset uint32) {
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-09#section-6.2
|
||||
func (rc *rackControl) update(seg *segment, ackSeg *segment) {
|
||||
rtt := time.Now().Sub(seg.xmitTime)
|
||||
tsOffset := rc.snd.ep.tsOffset
|
||||
|
||||
// If the ACK is for a retransmitted packet, do not update if it is a
|
||||
// spurious inference which is determined by below checks:
|
||||
// 1. When Timestamping option is available, if the TSVal is less than the
|
||||
// transmit time of the most recent retransmitted packet.
|
||||
// 1. When Timestamping option is available, if the TSVal is less than
|
||||
// the transmit time of the most recent retransmitted packet.
|
||||
// 2. When RTT calculated for the packet is less than the smoothed RTT
|
||||
// for the connection.
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
|
||||
// step 2
|
||||
if seg.xmitCount > 1 {
|
||||
if ackSeg.parsedOptions.TS && ackSeg.parsedOptions.TSEcr != 0 {
|
||||
if ackSeg.parsedOptions.TSEcr < tcpTimeStamp(seg.xmitTime, offset) {
|
||||
if ackSeg.parsedOptions.TSEcr < tcpTimeStamp(seg.xmitTime, tsOffset) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
@ -149,9 +185,8 @@ func (rc *rackControl) detectReorder(seg *segment) {
|
|||
}
|
||||
}
|
||||
|
||||
// setDSACKSeen updates rack control if duplicate SACK is seen by the connection.
|
||||
func (rc *rackControl) setDSACKSeen() {
|
||||
rc.dsackSeen = true
|
||||
func (rc *rackControl) setDSACKSeen(dsackSeen bool) {
|
||||
rc.dsackSeen = dsackSeen
|
||||
}
|
||||
|
||||
// shouldSchedulePTO dictates whether we should schedule a PTO or not.
|
||||
|
@ -272,3 +307,82 @@ func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updateRACKReorderWindow updates the reorder window.
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
|
||||
// * Step 4: Update RACK reordering window
|
||||
// To handle the prevalent small degree of reordering, RACK.reo_wnd serves as
|
||||
// an allowance for settling time before marking a packet lost. RACK starts
|
||||
// initially with a conservative window of min_RTT/4. If no reordering has
|
||||
// been observed RACK uses reo_wnd of zero during loss recovery, in order to
|
||||
// retransmit quickly, or when the number of DUPACKs exceeds the classic
|
||||
// DUPACKthreshold.
|
||||
func (rc *rackControl) updateRACKReorderWindow(ackSeg *segment) {
|
||||
dsackSeen := rc.dsackSeen
|
||||
snd := rc.snd
|
||||
|
||||
// React to DSACK once per round trip.
|
||||
// If SND.UNA < RACK.rtt_seq:
|
||||
// RACK.dsack = false
|
||||
if snd.sndUna.LessThan(rc.rttSeq) {
|
||||
dsackSeen = false
|
||||
}
|
||||
|
||||
// If RACK.dsack:
|
||||
// RACK.reo_wnd_incr += 1
|
||||
// RACK.dsack = false
|
||||
// RACK.rtt_seq = SND.NXT
|
||||
// RACK.reo_wnd_persist = 16
|
||||
if dsackSeen {
|
||||
rc.reoWndIncr++
|
||||
dsackSeen = false
|
||||
rc.rttSeq = snd.sndNxt
|
||||
rc.reoWndPersist = tcpRACKRecoveryThreshold
|
||||
} else if rc.exitedRecovery {
|
||||
// Else if exiting loss recovery:
|
||||
// RACK.reo_wnd_persist -= 1
|
||||
// If RACK.reo_wnd_persist <= 0:
|
||||
// RACK.reo_wnd_incr = 1
|
||||
rc.reoWndPersist--
|
||||
if rc.reoWndPersist <= 0 {
|
||||
rc.reoWndIncr = 1
|
||||
}
|
||||
rc.exitedRecovery = false
|
||||
}
|
||||
|
||||
// Reorder window is zero during loss recovery, or when the number of
|
||||
// DUPACKs exceeds the classic DUPACKthreshold.
|
||||
// If RACK.reord is FALSE:
|
||||
// If in loss recovery: (If in fast or timeout recovery)
|
||||
// RACK.reo_wnd = 0
|
||||
// Return
|
||||
// Else if RACK.pkts_sacked >= RACK.dupthresh:
|
||||
// RACK.reo_wnd = 0
|
||||
// return
|
||||
if !rc.reorderSeen {
|
||||
if snd.state == tcpip.RTORecovery || snd.state == tcpip.SACKRecovery {
|
||||
rc.reoWnd = 0
|
||||
return
|
||||
}
|
||||
|
||||
if snd.sackedOut >= nDupAckThreshold {
|
||||
rc.reoWnd = 0
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate reorder window.
|
||||
// RACK.reo_wnd = RACK.min_RTT / 4 * RACK.reo_wnd_incr
|
||||
// RACK.reo_wnd = min(RACK.reo_wnd, SRTT)
|
||||
snd.rtt.Lock()
|
||||
srtt := snd.rtt.srtt
|
||||
snd.rtt.Unlock()
|
||||
rc.reoWnd = time.Duration((int64(rc.minRTT) / 4) * int64(rc.reoWndIncr))
|
||||
if srtt < rc.reoWnd {
|
||||
rc.reoWnd = srtt
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *rackControl) exitRecovery() {
|
||||
rc.exitedRecovery = true
|
||||
}
|
||||
|
|
|
@ -258,14 +258,9 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
|
|||
highRxt: iss,
|
||||
rescueRxt: iss,
|
||||
},
|
||||
rc: rackControl{
|
||||
fack: iss,
|
||||
},
|
||||
gso: ep.gso != nil,
|
||||
}
|
||||
|
||||
s.rc.init()
|
||||
|
||||
if s.gso {
|
||||
s.ep.gso.MSS = uint16(maxPayloadSize)
|
||||
}
|
||||
|
@ -273,6 +268,7 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
|
|||
s.cc = s.initCongestionControl(ep.cc)
|
||||
|
||||
s.lr = s.initLossRecovery()
|
||||
s.rc.init(s, iss)
|
||||
|
||||
// A negative sndWndScale means that no scaling is in use, otherwise we
|
||||
// store the scaling value.
|
||||
|
@ -1058,7 +1054,6 @@ func (s *sender) leaveRecovery() {
|
|||
|
||||
// Deflate cwnd. It had been artificially inflated when new dups arrived.
|
||||
s.sndCwnd = s.sndSsthresh
|
||||
|
||||
s.cc.PostRecovery()
|
||||
}
|
||||
|
||||
|
@ -1195,11 +1190,13 @@ func (s *sender) isDupAck(seg *segment) bool {
|
|||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
|
||||
// steps 2 and 3.
|
||||
func (s *sender) walkSACK(rcvdSeg *segment) {
|
||||
s.rc.setDSACKSeen(false)
|
||||
|
||||
// Look for DSACK block.
|
||||
idx := 0
|
||||
n := len(rcvdSeg.parsedOptions.SACKBlocks)
|
||||
if checkDSACK(rcvdSeg) {
|
||||
s.rc.setDSACKSeen()
|
||||
s.rc.setDSACKSeen(true)
|
||||
idx = 1
|
||||
n--
|
||||
}
|
||||
|
@ -1220,7 +1217,7 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
|
|||
for _, sb := range sackBlocks {
|
||||
for seg != nil && seg.sequenceNumber.LessThan(sb.End) && seg.xmitCount != 0 {
|
||||
if sb.Start.LessThanEq(seg.sequenceNumber) && !seg.acked {
|
||||
s.rc.update(seg, rcvdSeg, s.ep.tsOffset)
|
||||
s.rc.update(seg, rcvdSeg)
|
||||
s.rc.detectReorder(seg)
|
||||
seg.acked = true
|
||||
s.sackedOut += s.pCount(seg, s.maxPayloadSize)
|
||||
|
@ -1424,7 +1421,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
|
|||
|
||||
// Update the RACK fields if SACK is enabled.
|
||||
if s.ep.sackPermitted && !seg.acked {
|
||||
s.rc.update(seg, rcvdSeg, s.ep.tsOffset)
|
||||
s.rc.update(seg, rcvdSeg)
|
||||
s.rc.detectReorder(seg)
|
||||
}
|
||||
|
||||
|
@ -1454,6 +1451,10 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
|
|||
s.cc.Update(originalOutstanding - s.outstanding)
|
||||
if s.fr.last.LessThan(s.sndUna) {
|
||||
s.state = tcpip.Open
|
||||
// Update RACK when we are exiting fast or RTO
|
||||
// recovery as described in the RFC
|
||||
// draft-ietf-tcpm-rack-08 Section-7.2 Step 4.
|
||||
s.rc.exitRecovery()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1477,6 +1478,12 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
|
|||
}
|
||||
}
|
||||
|
||||
// Update RACK reorder window.
|
||||
// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
|
||||
// * Upon receiving an ACK:
|
||||
// * Step 4: Update RACK reordering window
|
||||
s.rc.updateRACKReorderWindow(rcvdSeg)
|
||||
|
||||
// Now that we've popped all acknowledged data from the retransmit
|
||||
// queue, retransmit if needed.
|
||||
if s.fr.active {
|
||||
|
|
|
@ -393,13 +393,19 @@ func (rc *rackControl) StateFields() []string {
|
|||
return []string{
|
||||
"dsackSeen",
|
||||
"endSequence",
|
||||
"exitedRecovery",
|
||||
"fack",
|
||||
"minRTT",
|
||||
"rtt",
|
||||
"reorderSeen",
|
||||
"reoWnd",
|
||||
"reoWndIncr",
|
||||
"reoWndPersist",
|
||||
"rtt",
|
||||
"rttSeq",
|
||||
"xmitTime",
|
||||
"tlpRxtOut",
|
||||
"tlpHighRxt",
|
||||
"snd",
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -408,27 +414,39 @@ func (rc *rackControl) beforeSave() {}
|
|||
func (rc *rackControl) StateSave(stateSinkObject state.Sink) {
|
||||
rc.beforeSave()
|
||||
var xmitTimeValue unixTime = rc.saveXmitTime()
|
||||
stateSinkObject.SaveValue(6, xmitTimeValue)
|
||||
stateSinkObject.SaveValue(11, xmitTimeValue)
|
||||
stateSinkObject.Save(0, &rc.dsackSeen)
|
||||
stateSinkObject.Save(1, &rc.endSequence)
|
||||
stateSinkObject.Save(2, &rc.fack)
|
||||
stateSinkObject.Save(3, &rc.minRTT)
|
||||
stateSinkObject.Save(4, &rc.rtt)
|
||||
stateSinkObject.Save(2, &rc.exitedRecovery)
|
||||
stateSinkObject.Save(3, &rc.fack)
|
||||
stateSinkObject.Save(4, &rc.minRTT)
|
||||
stateSinkObject.Save(5, &rc.reorderSeen)
|
||||
stateSinkObject.Save(7, &rc.tlpRxtOut)
|
||||
stateSinkObject.Save(8, &rc.tlpHighRxt)
|
||||
stateSinkObject.Save(6, &rc.reoWnd)
|
||||
stateSinkObject.Save(7, &rc.reoWndIncr)
|
||||
stateSinkObject.Save(8, &rc.reoWndPersist)
|
||||
stateSinkObject.Save(9, &rc.rtt)
|
||||
stateSinkObject.Save(10, &rc.rttSeq)
|
||||
stateSinkObject.Save(12, &rc.tlpRxtOut)
|
||||
stateSinkObject.Save(13, &rc.tlpHighRxt)
|
||||
stateSinkObject.Save(14, &rc.snd)
|
||||
}
|
||||
|
||||
func (rc *rackControl) StateLoad(stateSourceObject state.Source) {
|
||||
stateSourceObject.Load(0, &rc.dsackSeen)
|
||||
stateSourceObject.Load(1, &rc.endSequence)
|
||||
stateSourceObject.Load(2, &rc.fack)
|
||||
stateSourceObject.Load(3, &rc.minRTT)
|
||||
stateSourceObject.Load(4, &rc.rtt)
|
||||
stateSourceObject.Load(2, &rc.exitedRecovery)
|
||||
stateSourceObject.Load(3, &rc.fack)
|
||||
stateSourceObject.Load(4, &rc.minRTT)
|
||||
stateSourceObject.Load(5, &rc.reorderSeen)
|
||||
stateSourceObject.Load(7, &rc.tlpRxtOut)
|
||||
stateSourceObject.Load(8, &rc.tlpHighRxt)
|
||||
stateSourceObject.LoadValue(6, new(unixTime), func(y interface{}) { rc.loadXmitTime(y.(unixTime)) })
|
||||
stateSourceObject.Load(6, &rc.reoWnd)
|
||||
stateSourceObject.Load(7, &rc.reoWndIncr)
|
||||
stateSourceObject.Load(8, &rc.reoWndPersist)
|
||||
stateSourceObject.Load(9, &rc.rtt)
|
||||
stateSourceObject.Load(10, &rc.rttSeq)
|
||||
stateSourceObject.Load(12, &rc.tlpRxtOut)
|
||||
stateSourceObject.Load(13, &rc.tlpHighRxt)
|
||||
stateSourceObject.Load(14, &rc.snd)
|
||||
stateSourceObject.LoadValue(11, new(unixTime), func(y interface{}) { rc.loadXmitTime(y.(unixTime)) })
|
||||
stateSourceObject.AfterLoad(rc.afterLoad)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue