[rack] TLP: Recovery detection.

This change implements TLP details enumerated in https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6 Fixes #5131 PiperOrigin-RevId: 351558449
2021-01-13 04:23:30 -08:00 · 2021-01-13 04:23:30 -08:00 · 19ab0f15f3
parent fb95e13df5
commit 19ab0f15f3
2 changed files with 87 additions and 15 deletions
--- a/pkg/tcpip/transport/tcp/rack.go
+++ b/pkg/tcpip/transport/tcp/rack.go
@ -67,6 +67,14 @@ type rackControl struct {
 	// probeTimer and probeWaker are used to schedule PTO for RACK TLP algorithm.
 	probeTimer timer       `state:"nosave"`
 	probeWaker sleep.Waker `state:"nosave"`
+
+	// tlpRxtOut indicates whether there is an unacknowledged
+	// TLP retransmission.
+	tlpRxtOut bool
+
+	// tlpHighRxt the value of sender.sndNxt at the time of sending
+	// a TLP retransmission.
+	tlpHighRxt seqnum.Value
 }

 // init initializes RACK specific fields.
@ -203,3 +211,40 @@ func (s *sender) probeTimerExpired() *tcpip.Error {
 	//  				Arm RTO timer only.
 	return nil
 }
+
+// detectTLPRecovery detects if recovery was accomplished by the loss probes
+// and updates TLP state accordingly.
+// See https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.3.
+func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) {
+	if !(s.ep.sackPermitted && s.rc.tlpRxtOut) {
+		return
+	}
+
+	// Step 1.
+	if s.isDupAck(rcvdSeg) && ack == s.rc.tlpHighRxt {
+		var sbAboveTLPHighRxt bool
+		for _, sb := range rcvdSeg.parsedOptions.SACKBlocks {
+			if s.rc.tlpHighRxt.LessThan(sb.End) {
+				sbAboveTLPHighRxt = true
+				break
+			}
+		}
+		if !sbAboveTLPHighRxt {
+			// TLP episode is complete.
+			s.rc.tlpRxtOut = false
+		}
+	}
+
+	if s.rc.tlpRxtOut && s.rc.tlpHighRxt.LessThanEq(ack) {
+		// TLP episode is complete.
+		s.rc.tlpRxtOut = false
+		if !checkDSACK(rcvdSeg) {
+			// Step 2. Either the original packet or the retransmission (in the
+			// form of a probe) was lost. Invoke a congestion control response
+			// equivalent to fast recovery.
+			s.cc.HandleNDupAcks()
+			s.enterRecovery()
+			s.leaveRecovery()
+		}
+	}
+}
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@ -533,6 +533,10 @@ func (s *sender) retransmitTimerExpired() bool {
 	s.ep.stack.Stats().TCP.Timeouts.Increment()
 	s.ep.stats.SendErrors.Timeouts.Increment()

+	// Set TLPRxtOut to false according to
+	// https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1.
+	s.rc.tlpRxtOut = false
+
 	// Give up if we've waited more than a minute since the last resend or
 	// if a user time out is set and we have exceeded the user specified
 	// timeout since the first retransmission.
@ -1060,6 +1064,9 @@ func (s *sender) enterRecovery() {
 	if s.ep.sackPermitted {
 		s.state = SACKRecovery
 		s.ep.stack.Stats().TCP.SACKRecovery.Increment()
+		// Set TLPRxtOut to false according to
+		// https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1.
+		s.rc.tlpRxtOut = false
 		return
 	}
 	s.state = FastRecovery
@ -1143,19 +1150,11 @@ func (s *sender) SetPipe() {
 // detected. It manages the state related to duplicate acks and determines if
 // a retransmit is needed according to the rules in RFC 6582 (NewReno).
 func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
-	ack := seg.ackNumber
+	// We're not in fast recovery yet.

-	// We're not in fast recovery yet. A segment is considered a duplicate
-	// only if it doesn't carry any data and doesn't update the send window,
-	// because if it does, it wasn't sent in response to an out-of-order
-	// segment. If SACK is enabled then we have an additional check to see
-	// if the segment carries new SACK information. If it does then it is
-	// considered a duplicate ACK as per RFC6675.
-	if ack != s.sndUna || seg.logicalLen() != 0 || s.sndWnd != seg.window || ack == s.sndNxt {
-		if !s.ep.sackPermitted || !seg.hasNewSACKInfo {
-			s.dupAckCount = 0
-			return false
-		}
+	if !s.isDupAck(seg) {
+		s.dupAckCount = 0
+		return false
 	}

 	s.dupAckCount++
@ -1186,6 +1185,31 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
 	return true
 }

+// isDupAck determines if seg is a duplicate ack as defined in
+// https://tools.ietf.org/html/rfc5681#section-2.
+func (s *sender) isDupAck(seg *segment) bool {
+	// A TCP that utilizes selective acknowledgments (SACKs) [RFC2018, RFC2883]
+	// can leverage the SACK information to determine when an incoming ACK is a
+	// "duplicate" (e.g., if the ACK contains previously unknown SACK
+	// information).
+	if s.ep.sackPermitted && !seg.hasNewSACKInfo {
+		return false
+	}
+
+	// (a) The receiver of the ACK has outstanding data.
+	return s.sndUna != s.sndNxt &&
+		// (b) The incoming acknowledgment carries no data.
+		seg.logicalLen() == 0 &&
+		// (c) The SYN and FIN bits are both off.
+		!seg.flagIsSet(header.TCPFlagFin) && !seg.flagIsSet(header.TCPFlagSyn) &&
+		// (d) the ACK number is equal to the greatest acknowledgment received on
+		// the given connection (TCP.UNA from RFC793).
+		seg.ackNumber == s.sndUna &&
+		// (e) the advertised window in the incoming acknowledgment equals the
+		// advertised window in the last incoming acknowledgment.
+		s.sndWnd == seg.window
+}
+
 // Iterate the writeList and update RACK for each segment which is newly acked
 // either cumulatively or selectively. Loop through the segments which are
 // sacked, and update the RACK related variables and check for reordering.
@ -1196,7 +1220,7 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
 	// Look for DSACK block.
 	idx := 0
 	n := len(rcvdSeg.parsedOptions.SACKBlocks)
-	if s.checkDSACK(rcvdSeg) {
+	if checkDSACK(rcvdSeg) {
 		s.rc.setDSACKSeen()
 		idx = 1
 		n--
@ -1228,8 +1252,8 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
 	}
 }

-// checkDSACK checks if a DSACK is reported and updates it in RACK.
-func (s *sender) checkDSACK(rcvdSeg *segment) bool {
+// checkDSACK checks if a DSACK is reported.
+func checkDSACK(rcvdSeg *segment) bool {
 	n := len(rcvdSeg.parsedOptions.SACKBlocks)
 	if n == 0 {
 		return false
@ -1338,6 +1362,9 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 		fastRetransmit = s.detectLoss(rcvdSeg)
 	}

+	// See if TLP based recovery was successful.
+	s.detectTLPRecovery(ack, rcvdSeg)
+
 	// Stash away the current window size.
 	s.sndWnd = rcvdSeg.window