2019-04-29 21:25:05 +00:00
// Copyright 2018 The gVisor Authors.
2018-07-09 21:03:03 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2018-04-27 17:37:02 +00:00
package tcp
import (
"fmt"
2018-05-11 23:27:50 +00:00
"sync"
2018-07-10 16:22:37 +00:00
"time"
2018-04-27 17:37:02 +00:00
2019-06-13 23:49:09 +00:00
"gvisor.dev/gvisor/pkg/tcpip"
2019-08-15 18:12:28 +00:00
"gvisor.dev/gvisor/pkg/tcpip/header"
2019-06-13 23:49:09 +00:00
"gvisor.dev/gvisor/pkg/tcpip/stack"
2018-04-27 17:37:02 +00:00
)
2018-05-11 23:27:50 +00:00
func ( e * endpoint ) drainSegmentLocked ( ) {
// Drain only up to once.
if e . drainDone != nil {
return
}
e . drainDone = make ( chan struct { } )
e . undrain = make ( chan struct { } )
e . mu . Unlock ( )
2018-07-10 16:22:37 +00:00
e . notifyProtocolGoroutine ( notifyDrain )
2018-05-11 23:27:50 +00:00
<- e . drainDone
e . mu . Lock ( )
}
2018-04-27 17:37:02 +00:00
// beforeSave is invoked by stateify.
func ( e * endpoint ) beforeSave ( ) {
// Stop incoming packets.
e . segmentQueue . setLimit ( 0 )
2018-05-11 23:27:50 +00:00
e . mu . Lock ( )
defer e . mu . Unlock ( )
2018-04-27 17:37:02 +00:00
switch e . state {
2019-06-06 22:03:44 +00:00
case StateInitial , StateBound :
2019-07-23 19:09:15 +00:00
// TODO(b/138137272): this enumeration duplicates
// EndpointState.connected. remove it.
2019-06-06 22:03:44 +00:00
case StateEstablished , StateSynSent , StateSynRecv , StateFinWait1 , StateFinWait2 , StateTimeWait , StateCloseWait , StateLastAck , StateClosing :
2018-07-10 20:53:39 +00:00
if e . route . Capabilities ( ) & stack . CapabilitySaveRestore == 0 {
2018-07-30 22:42:01 +00:00
if e . route . Capabilities ( ) & stack . CapabilityDisconnectOk == 0 {
2019-10-10 00:54:51 +00:00
panic ( tcpip . ErrSaveRejection { fmt . Errorf ( "endpoint cannot be saved in connected state: local %v:%d, remote %v:%d" , e . ID . LocalAddress , e . ID . LocalPort , e . ID . RemoteAddress , e . ID . RemotePort ) } )
2018-07-30 22:42:01 +00:00
}
e . resetConnectionLocked ( tcpip . ErrConnectionAborted )
e . mu . Unlock ( )
e . Close ( )
e . mu . Lock ( )
2018-07-10 20:53:39 +00:00
}
if ! e . workerRunning {
2018-07-30 22:42:01 +00:00
// The endpoint must be in acceptedChan or has been just
// disconnected and closed.
2018-07-10 16:22:37 +00:00
break
2018-04-27 17:37:02 +00:00
}
2018-07-10 20:53:39 +00:00
fallthrough
2019-06-06 22:03:44 +00:00
case StateListen , StateConnecting :
2018-05-11 23:27:50 +00:00
e . drainSegmentLocked ( )
2019-06-06 22:03:44 +00:00
if e . state != StateClose && e . state != StateError {
2018-07-10 16:22:37 +00:00
if ! e . workerRunning {
panic ( "endpoint has no worker running in listen, connecting, or connected state" )
}
2018-05-11 23:27:50 +00:00
break
}
fallthrough
2019-06-06 22:03:44 +00:00
case StateError , StateClose :
2019-11-07 17:45:26 +00:00
for ( e . state == StateError || e . state == StateClose ) && e . workerRunning {
2018-07-10 16:22:37 +00:00
e . mu . Unlock ( )
time . Sleep ( 100 * time . Millisecond )
e . mu . Lock ( )
}
2018-06-05 22:43:38 +00:00
if e . workerRunning {
2018-07-10 16:22:37 +00:00
panic ( "endpoint still has worker running in closed or error state" )
2018-06-05 22:43:38 +00:00
}
2018-04-27 17:37:02 +00:00
default :
panic ( fmt . Sprintf ( "endpoint in unknown state %v" , e . state ) )
}
2018-07-10 16:22:37 +00:00
if e . waiterQueue != nil && ! e . waiterQueue . IsEmpty ( ) {
panic ( "endpoint still has waiters upon save" )
}
2019-06-06 22:03:44 +00:00
if e . state != StateClose && ! ( ( e . state == StateBound || e . state == StateListen ) == e . isPortReserved ) {
2018-12-05 23:01:41 +00:00
panic ( "endpoints which are not in the closed state must have a reserved port IFF they are in bound or listen state" )
2018-07-10 16:22:37 +00:00
}
2018-07-12 20:48:18 +00:00
}
2018-07-10 16:22:37 +00:00
2018-07-12 20:48:18 +00:00
// saveAcceptedChan is invoked by stateify.
func ( e * endpoint ) saveAcceptedChan ( ) [ ] * endpoint {
if e . acceptedChan == nil {
return nil
}
acceptedEndpoints := make ( [ ] * endpoint , len ( e . acceptedChan ) , cap ( e . acceptedChan ) )
2018-12-05 23:01:41 +00:00
for i := 0 ; i < len ( acceptedEndpoints ) ; i ++ {
select {
case ep := <- e . acceptedChan :
acceptedEndpoints [ i ] = ep
default :
panic ( "endpoint acceptedChan buffer got consumed by background context" )
}
2018-07-12 20:48:18 +00:00
}
2018-12-05 23:01:41 +00:00
for i := 0 ; i < len ( acceptedEndpoints ) ; i ++ {
select {
case e . acceptedChan <- acceptedEndpoints [ i ] :
default :
panic ( "endpoint acceptedChan buffer got populated by background context" )
}
2018-07-12 20:48:18 +00:00
}
return acceptedEndpoints
}
// loadAcceptedChan is invoked by stateify.
func ( e * endpoint ) loadAcceptedChan ( acceptedEndpoints [ ] * endpoint ) {
if cap ( acceptedEndpoints ) > 0 {
e . acceptedChan = make ( chan * endpoint , cap ( acceptedEndpoints ) )
for _ , ep := range acceptedEndpoints {
e . acceptedChan <- ep
2018-07-10 16:22:37 +00:00
}
}
}
// saveState is invoked by stateify.
2019-06-06 22:03:44 +00:00
func ( e * endpoint ) saveState ( ) EndpointState {
2018-07-10 16:22:37 +00:00
return e . state
}
// Endpoint loading must be done in the following ordering by their state, to
// avoid dangling connecting w/o listening peer, and to avoid conflicts in port
// reservation.
var connectedLoading sync . WaitGroup
var listenLoading sync . WaitGroup
var connectingLoading sync . WaitGroup
// Bound endpoint loading happens last.
// loadState is invoked by stateify.
2019-06-06 22:03:44 +00:00
func ( e * endpoint ) loadState ( state EndpointState ) {
2018-07-10 16:22:37 +00:00
// This is to ensure that the loading wait groups include all applicable
// endpoints before any asynchronous calls to the Wait() methods.
2019-07-23 19:09:15 +00:00
if state . connected ( ) {
2018-07-10 16:22:37 +00:00
connectedLoading . Add ( 1 )
2019-07-23 19:09:15 +00:00
}
switch state {
2019-06-06 22:03:44 +00:00
case StateListen :
2018-07-10 16:22:37 +00:00
listenLoading . Add ( 1 )
2019-06-06 22:03:44 +00:00
case StateConnecting , StateSynSent , StateSynRecv :
2018-07-10 16:22:37 +00:00
connectingLoading . Add ( 1 )
}
e . state = state
2018-04-27 17:37:02 +00:00
}
// afterLoad is invoked by stateify.
func ( e * endpoint ) afterLoad ( ) {
2019-11-07 17:45:26 +00:00
// Freeze segment queue before registering to prevent any segments
// from being delivered while it is being restored.
e . origEndpointState = e . state
// Restore the endpoint to InitialState as it will be moved to
// its origEndpointState during Resume.
e . state = StateInitial
2019-08-08 19:32:00 +00:00
stack . StackFromEnv . RegisterRestoredEndpoint ( e )
2018-04-27 17:37:02 +00:00
}
2019-08-15 18:12:28 +00:00
// Resume implements tcpip.ResumableEndpoint.Resume.
func ( e * endpoint ) Resume ( s * stack . Stack ) {
e . stack = s
e . segmentQueue . setLimit ( MaxUnprocessedSegments )
e . workMu . Init ( )
2019-11-07 17:45:26 +00:00
state := e . origEndpointState
2019-08-15 18:12:28 +00:00
switch state {
case StateInitial , StateBound , StateListen , StateConnecting , StateEstablished :
var ss SendBufferSizeOption
if err := e . stack . TransportProtocolOption ( ProtocolNumber , & ss ) ; err == nil {
if e . sndBufSize < ss . Min || e . sndBufSize > ss . Max {
panic ( fmt . Sprintf ( "endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]" , e . sndBufSize , ss . Min , ss . Max ) )
}
if e . rcvBufSize < ss . Min || e . rcvBufSize > ss . Max {
panic ( fmt . Sprintf ( "endpoint.rcvBufSize %d is outside the min and max allowed [%d, %d]" , e . rcvBufSize , ss . Min , ss . Max ) )
}
}
}
bind := func ( ) {
2019-10-10 00:54:51 +00:00
if len ( e . BindAddr ) == 0 {
e . BindAddr = e . ID . LocalAddress
2019-08-15 18:12:28 +00:00
}
2019-10-29 23:13:43 +00:00
addr := e . BindAddr
port := e . ID . LocalPort
if err := e . Bind ( tcpip . FullAddress { Addr : addr , Port : port } ) ; err != nil {
panic ( fmt . Sprintf ( "endpoint binding [%v]:%d failed: %v" , addr , port , err ) )
2019-08-15 18:12:28 +00:00
}
}
switch state {
case StateEstablished , StateFinWait1 , StateFinWait2 , StateTimeWait , StateCloseWait , StateLastAck , StateClosing :
bind ( )
if len ( e . connectingAddress ) == 0 {
2019-10-10 00:54:51 +00:00
e . connectingAddress = e . ID . RemoteAddress
2019-08-15 18:12:28 +00:00
// This endpoint is accepted by netstack but not yet by
// the app. If the endpoint is IPv6 but the remote
// address is IPv4, we need to connect as IPv6 so that
// dual-stack mode can be properly activated.
2019-10-10 00:54:51 +00:00
if e . NetProto == header . IPv6ProtocolNumber && len ( e . ID . RemoteAddress ) != header . IPv6AddressSize {
e . connectingAddress = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff" + e . ID . RemoteAddress
2019-08-15 18:12:28 +00:00
}
}
// Reset the scoreboard to reinitialize the sack information as
// we do not restore SACK information.
e . scoreboard . Reset ( )
2019-10-10 00:54:51 +00:00
if err := e . connect ( tcpip . FullAddress { NIC : e . boundNICID , Addr : e . connectingAddress , Port : e . ID . RemotePort } , false , e . workerRunning ) ; err != tcpip . ErrConnectStarted {
2019-08-15 18:12:28 +00:00
panic ( "endpoint connecting failed: " + err . String ( ) )
}
2019-11-07 17:45:26 +00:00
e . mu . Lock ( )
e . state = e . origEndpointState
closed := e . closed
e . mu . Unlock ( )
e . notifyProtocolGoroutine ( notifyTickleWorker )
if state == StateFinWait2 && closed {
// If the endpoint has been closed then make sure we notify so
// that the FIN_WAIT2 timer is started after a restore.
e . notifyProtocolGoroutine ( notifyClose )
}
2019-08-15 18:12:28 +00:00
connectedLoading . Done ( )
case StateListen :
tcpip . AsyncLoading . Add ( 1 )
go func ( ) {
connectedLoading . Wait ( )
bind ( )
backlog := cap ( e . acceptedChan )
if err := e . Listen ( backlog ) ; err != nil {
panic ( "endpoint listening failed: " + err . String ( ) )
}
listenLoading . Done ( )
tcpip . AsyncLoading . Done ( )
} ( )
case StateConnecting , StateSynSent , StateSynRecv :
tcpip . AsyncLoading . Add ( 1 )
go func ( ) {
connectedLoading . Wait ( )
listenLoading . Wait ( )
bind ( )
2019-10-10 00:54:51 +00:00
if err := e . Connect ( tcpip . FullAddress { NIC : e . boundNICID , Addr : e . connectingAddress , Port : e . ID . RemotePort } ) ; err != tcpip . ErrConnectStarted {
2019-08-15 18:12:28 +00:00
panic ( "endpoint connecting failed: " + err . String ( ) )
}
connectingLoading . Done ( )
tcpip . AsyncLoading . Done ( )
} ( )
case StateBound :
tcpip . AsyncLoading . Add ( 1 )
go func ( ) {
connectedLoading . Wait ( )
listenLoading . Wait ( )
connectingLoading . Wait ( )
bind ( )
tcpip . AsyncLoading . Done ( )
} ( )
case StateClose :
if e . isPortReserved {
tcpip . AsyncLoading . Add ( 1 )
go func ( ) {
connectedLoading . Wait ( )
listenLoading . Wait ( )
connectingLoading . Wait ( )
bind ( )
e . state = StateClose
tcpip . AsyncLoading . Done ( )
} ( )
}
2019-11-07 17:45:26 +00:00
e . state = StateClose
e . stack . CompleteTransportEndpointCleanup ( e )
tcpip . DeleteDanglingEndpoint ( e )
2019-08-15 18:12:28 +00:00
case StateError :
2019-11-07 17:45:26 +00:00
e . state = StateError
2019-10-29 23:13:43 +00:00
e . stack . CompleteTransportEndpointCleanup ( e )
2019-08-15 18:12:28 +00:00
tcpip . DeleteDanglingEndpoint ( e )
}
}
2018-05-11 23:27:50 +00:00
// saveLastError is invoked by stateify.
func ( e * endpoint ) saveLastError ( ) string {
if e . lastError == nil {
return ""
}
return e . lastError . String ( )
}
// loadLastError is invoked by stateify.
func ( e * endpoint ) loadLastError ( s string ) {
if s == "" {
return
}
e . lastError = loadError ( s )
}
// saveHardError is invoked by stateify.
2019-10-10 00:54:51 +00:00
func ( e * EndpointInfo ) saveHardError ( ) string {
if e . HardError == nil {
2018-05-11 23:27:50 +00:00
return ""
}
2019-10-10 00:54:51 +00:00
return e . HardError . String ( )
2018-05-11 23:27:50 +00:00
}
// loadHardError is invoked by stateify.
2019-10-10 00:54:51 +00:00
func ( e * EndpointInfo ) loadHardError ( s string ) {
2018-05-11 23:27:50 +00:00
if s == "" {
return
}
2019-10-10 00:54:51 +00:00
e . HardError = loadError ( s )
2018-05-11 23:27:50 +00:00
}
var messageToError map [ string ] * tcpip . Error
var populate sync . Once
func loadError ( s string ) * tcpip . Error {
populate . Do ( func ( ) {
var errors = [ ] * tcpip . Error {
tcpip . ErrUnknownProtocol ,
tcpip . ErrUnknownNICID ,
2019-03-08 23:48:16 +00:00
tcpip . ErrUnknownDevice ,
2018-05-11 23:27:50 +00:00
tcpip . ErrUnknownProtocolOption ,
tcpip . ErrDuplicateNICID ,
tcpip . ErrDuplicateAddress ,
tcpip . ErrNoRoute ,
tcpip . ErrBadLinkEndpoint ,
tcpip . ErrAlreadyBound ,
tcpip . ErrInvalidEndpointState ,
tcpip . ErrAlreadyConnecting ,
tcpip . ErrAlreadyConnected ,
tcpip . ErrNoPortAvailable ,
tcpip . ErrPortInUse ,
tcpip . ErrBadLocalAddress ,
tcpip . ErrClosedForSend ,
tcpip . ErrClosedForReceive ,
tcpip . ErrWouldBlock ,
tcpip . ErrConnectionRefused ,
tcpip . ErrTimeout ,
tcpip . ErrAborted ,
tcpip . ErrConnectStarted ,
tcpip . ErrDestinationRequired ,
tcpip . ErrNotSupported ,
tcpip . ErrQueueSizeNotSupported ,
tcpip . ErrNotConnected ,
tcpip . ErrConnectionReset ,
tcpip . ErrConnectionAborted ,
tcpip . ErrNoSuchFile ,
tcpip . ErrInvalidOptionValue ,
tcpip . ErrNoLinkAddress ,
tcpip . ErrBadAddress ,
2018-05-17 19:49:16 +00:00
tcpip . ErrNetworkUnreachable ,
2018-10-10 21:09:24 +00:00
tcpip . ErrMessageTooLong ,
tcpip . ErrNoBufferSpace ,
2019-02-20 20:53:07 +00:00
tcpip . ErrBroadcastDisabled ,
2019-04-26 23:50:35 +00:00
tcpip . ErrNotPermitted ,
2019-07-03 20:57:24 +00:00
tcpip . ErrAddressFamilyNotSupported ,
2018-05-11 23:27:50 +00:00
}
messageToError = make ( map [ string ] * tcpip . Error )
for _ , e := range errors {
if messageToError [ e . String ( ) ] != nil {
panic ( "tcpip errors with duplicated message: " + e . String ( ) )
}
messageToError [ e . String ( ) ] = e
}
} )
e , ok := messageToError [ s ]
if ! ok {
panic ( "unknown error message: " + s )
}
return e
}
2019-06-14 05:26:59 +00:00
// saveMeasureTime is invoked by stateify.
func ( r * rcvBufAutoTuneParams ) saveMeasureTime ( ) unixTime {
return unixTime { r . measureTime . Unix ( ) , r . measureTime . UnixNano ( ) }
}
// loadMeasureTime is invoked by stateify.
func ( r * rcvBufAutoTuneParams ) loadMeasureTime ( unix unixTime ) {
r . measureTime = time . Unix ( unix . second , unix . nano )
}
// saveRttMeasureTime is invoked by stateify.
func ( r * rcvBufAutoTuneParams ) saveRttMeasureTime ( ) unixTime {
return unixTime { r . rttMeasureTime . Unix ( ) , r . rttMeasureTime . UnixNano ( ) }
}
// loadRttMeasureTime is invoked by stateify.
func ( r * rcvBufAutoTuneParams ) loadRttMeasureTime ( unix unixTime ) {
r . rttMeasureTime = time . Unix ( unix . second , unix . nano )
}