2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-07-09 21:03:03 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
package stack
|
|
|
|
|
|
|
|
import (
|
2019-04-02 18:12:29 +00:00
|
|
|
"fmt"
|
2018-12-28 19:26:01 +00:00
|
|
|
"math/rand"
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-01-10 06:00:42 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sync"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/hash/jenkins"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
2020-06-11 06:48:03 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/ports"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type protocolIDs struct {
|
|
|
|
network tcpip.NetworkProtocolNumber
|
|
|
|
transport tcpip.TransportProtocolNumber
|
|
|
|
}
|
|
|
|
|
|
|
|
// transportEndpoints manages all endpoints of a given protocol. It has its own
|
|
|
|
// mutex so as to reduce interference between protocols.
|
|
|
|
type transportEndpoints struct {
|
2019-02-27 22:30:20 +00:00
|
|
|
// mu protects all fields of the transportEndpoints.
|
2018-04-27 17:37:02 +00:00
|
|
|
mu sync.RWMutex
|
2020-03-26 15:46:33 +00:00
|
|
|
endpoints map[TransportEndpointID]*endpointsByNIC
|
2019-02-27 22:30:20 +00:00
|
|
|
// rawEndpoints contains endpoints for raw sockets, which receive all
|
|
|
|
// traffic of a given protocol regardless of port.
|
2019-04-02 18:12:29 +00:00
|
|
|
rawEndpoints []RawTransportEndpoint
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2019-10-29 23:13:43 +00:00
|
|
|
// unregisterEndpoint unregisters the endpoint with the given id such that it
|
|
|
|
// won't receive any more packets.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (eps *transportEndpoints) unregisterEndpoint(id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
|
2019-10-29 23:13:43 +00:00
|
|
|
eps.mu.Lock()
|
|
|
|
defer eps.mu.Unlock()
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC, ok := eps.endpoints[id]
|
2019-10-29 23:13:43 +00:00
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
2020-06-11 06:48:03 +00:00
|
|
|
if !epsByNIC.unregisterEndpoint(bindToDevice, ep, flags) {
|
2019-10-29 23:13:43 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
delete(eps.endpoints, id)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eps *transportEndpoints) transportEndpoints() []TransportEndpoint {
|
|
|
|
eps.mu.RLock()
|
|
|
|
defer eps.mu.RUnlock()
|
|
|
|
es := make([]TransportEndpoint, 0, len(eps.endpoints))
|
|
|
|
for _, e := range eps.endpoints {
|
|
|
|
es = append(es, e.transportEndpoints()...)
|
|
|
|
}
|
|
|
|
return es
|
|
|
|
}
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
// iterEndpointsLocked yields all endpointsByNIC in eps that match id, in
|
|
|
|
// descending order of match quality. If a call to yield returns false,
|
|
|
|
// iterEndpointsLocked stops iteration and returns immediately.
|
|
|
|
//
|
|
|
|
// Preconditions: eps.mu must be locked.
|
|
|
|
func (eps *transportEndpoints) iterEndpointsLocked(id TransportEndpointID, yield func(*endpointsByNIC) bool) {
|
|
|
|
// Try to find a match with the id as provided.
|
|
|
|
if ep, ok := eps.endpoints[id]; ok {
|
|
|
|
if !yield(ep) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to find a match with the id minus the local address.
|
|
|
|
nid := id
|
|
|
|
|
|
|
|
nid.LocalAddress = ""
|
|
|
|
if ep, ok := eps.endpoints[nid]; ok {
|
|
|
|
if !yield(ep) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to find a match with the id minus the remote part.
|
|
|
|
nid.LocalAddress = id.LocalAddress
|
|
|
|
nid.RemoteAddress = ""
|
|
|
|
nid.RemotePort = 0
|
|
|
|
if ep, ok := eps.endpoints[nid]; ok {
|
|
|
|
if !yield(ep) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to find a match with only the local port.
|
|
|
|
nid.LocalAddress = ""
|
|
|
|
if ep, ok := eps.endpoints[nid]; ok {
|
|
|
|
if !yield(ep) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// findAllEndpointsLocked returns all endpointsByNIC in eps that match id, in
|
|
|
|
// descending order of match quality.
|
|
|
|
//
|
|
|
|
// Preconditions: eps.mu must be locked.
|
|
|
|
func (eps *transportEndpoints) findAllEndpointsLocked(id TransportEndpointID) []*endpointsByNIC {
|
|
|
|
var matchedEPs []*endpointsByNIC
|
|
|
|
eps.iterEndpointsLocked(id, func(ep *endpointsByNIC) bool {
|
|
|
|
matchedEPs = append(matchedEPs, ep)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return matchedEPs
|
|
|
|
}
|
|
|
|
|
|
|
|
// findEndpointLocked returns the endpoint that most closely matches the given id.
|
|
|
|
//
|
|
|
|
// Preconditions: eps.mu must be locked.
|
|
|
|
func (eps *transportEndpoints) findEndpointLocked(id TransportEndpointID) *endpointsByNIC {
|
|
|
|
var matchedEP *endpointsByNIC
|
|
|
|
eps.iterEndpointsLocked(id, func(ep *endpointsByNIC) bool {
|
|
|
|
matchedEP = ep
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
return matchedEP
|
|
|
|
}
|
|
|
|
|
|
|
|
type endpointsByNIC struct {
|
2019-09-27 21:12:35 +00:00
|
|
|
mu sync.RWMutex
|
|
|
|
endpoints map[tcpip.NICID]*multiPortEndpoint
|
|
|
|
// seed is a random secret for a jenkins hash.
|
|
|
|
seed uint32
|
|
|
|
}
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
func (epsByNIC *endpointsByNIC) transportEndpoints() []TransportEndpoint {
|
|
|
|
epsByNIC.mu.RLock()
|
|
|
|
defer epsByNIC.mu.RUnlock()
|
2019-10-29 23:13:43 +00:00
|
|
|
var eps []TransportEndpoint
|
2020-03-26 15:46:33 +00:00
|
|
|
for _, ep := range epsByNIC.endpoints {
|
2019-10-29 23:13:43 +00:00
|
|
|
eps = append(eps, ep.transportEndpoints()...)
|
|
|
|
}
|
|
|
|
return eps
|
|
|
|
}
|
|
|
|
|
2019-09-27 21:12:35 +00:00
|
|
|
// HandlePacket is called by the stack when new packets arrive to this transport
|
|
|
|
// endpoint.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.RLock()
|
2019-09-27 21:12:35 +00:00
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
mpep, ok := epsByNIC.endpoints[r.ref.nic.ID()]
|
2019-09-27 21:12:35 +00:00
|
|
|
if !ok {
|
2020-03-26 15:46:33 +00:00
|
|
|
if mpep, ok = epsByNIC.endpoints[0]; !ok {
|
|
|
|
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
|
2019-09-27 21:12:35 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this is a broadcast or multicast datagram, deliver the datagram to all
|
|
|
|
// endpoints bound to the right device.
|
2020-09-16 19:19:06 +00:00
|
|
|
if isInboundMulticastOrBroadcast(r) {
|
2019-11-06 22:24:38 +00:00
|
|
|
mpep.handlePacketAll(r, id, pkt)
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
|
2019-09-27 21:12:35 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
// multiPortEndpoints are guaranteed to have at least one element.
|
2020-03-26 15:46:33 +00:00
|
|
|
transEP := selectEndpoint(id, mpep, epsByNIC.seed)
|
2020-01-14 22:14:17 +00:00
|
|
|
if queuedProtocol, mustQueue := mpep.demux.queuedProtocols[protocolIDs{mpep.netProto, mpep.transProto}]; mustQueue {
|
|
|
|
queuedProtocol.QueuePacket(r, transEP, id, pkt)
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.RUnlock()
|
2020-01-14 22:14:17 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
transEP.HandlePacket(r, id, pkt)
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, pkt *PacketBuffer) {
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.RLock()
|
|
|
|
defer epsByNIC.mu.RUnlock()
|
2019-09-27 21:12:35 +00:00
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
mpep, ok := epsByNIC.endpoints[n.ID()]
|
2019-09-27 21:12:35 +00:00
|
|
|
if !ok {
|
2020-03-26 15:46:33 +00:00
|
|
|
mpep, ok = epsByNIC.endpoints[0]
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(eyalsoha): Why don't we look at id to see if this packet needs to
|
|
|
|
// broadcast like we are doing with handlePacket above?
|
|
|
|
|
|
|
|
// multiPortEndpoints are guaranteed to have at least one element.
|
2020-03-26 15:46:33 +00:00
|
|
|
selectEndpoint(id, mpep, epsByNIC.seed).HandleControlPacket(id, typ, extra, pkt)
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// registerEndpoint returns true if it succeeds. It fails and returns
|
|
|
|
// false if ep already has an element with the same key.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, t TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error {
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.Lock()
|
|
|
|
defer epsByNIC.mu.Unlock()
|
2019-09-27 21:12:35 +00:00
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
multiPortEp, ok := epsByNIC.endpoints[bindToDevice]
|
2020-03-12 04:12:41 +00:00
|
|
|
if !ok {
|
|
|
|
multiPortEp = &multiPortEndpoint{
|
|
|
|
demux: d,
|
|
|
|
netProto: netProto,
|
|
|
|
transProto: transProto,
|
|
|
|
}
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.endpoints[bindToDevice] = multiPortEp
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
|
|
|
|
2020-06-11 06:48:03 +00:00
|
|
|
return multiPortEp.singleRegisterEndpoint(t, flags)
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 02:14:05 +00:00
|
|
|
func (epsByNIC *endpointsByNIC) checkEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error {
|
|
|
|
epsByNIC.mu.RLock()
|
|
|
|
defer epsByNIC.mu.RUnlock()
|
|
|
|
|
|
|
|
multiPortEp, ok := epsByNIC.endpoints[bindToDevice]
|
|
|
|
if !ok {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return multiPortEp.singleCheckEndpoint(flags)
|
|
|
|
}
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
// unregisterEndpoint returns true if endpointsByNIC has to be unregistered.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (epsByNIC *endpointsByNIC) unregisterEndpoint(bindToDevice tcpip.NICID, t TransportEndpoint, flags ports.Flags) bool {
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.Lock()
|
|
|
|
defer epsByNIC.mu.Unlock()
|
|
|
|
multiPortEp, ok := epsByNIC.endpoints[bindToDevice]
|
2019-09-27 21:12:35 +00:00
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
2020-06-11 06:48:03 +00:00
|
|
|
if multiPortEp.unregisterEndpoint(t, flags) {
|
2020-03-26 15:46:33 +00:00
|
|
|
delete(epsByNIC.endpoints, bindToDevice)
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
2020-03-26 15:46:33 +00:00
|
|
|
return len(epsByNIC.endpoints) == 0
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// transportDemuxer demultiplexes packets targeted at a transport endpoint
|
|
|
|
// (i.e., after they've been parsed by the network layer). It does two levels
|
|
|
|
// of demultiplexing: first based on the network and transport protocols, then
|
2019-04-02 18:12:29 +00:00
|
|
|
// based on endpoints IDs. It should only be instantiated via
|
|
|
|
// newTransportDemuxer.
|
2018-04-27 17:37:02 +00:00
|
|
|
type transportDemuxer struct {
|
2019-04-02 18:12:29 +00:00
|
|
|
// protocol is immutable.
|
2020-01-14 22:14:17 +00:00
|
|
|
protocol map[protocolIDs]*transportEndpoints
|
|
|
|
queuedProtocols map[protocolIDs]queuedTransportProtocol
|
|
|
|
}
|
|
|
|
|
|
|
|
// queuedTransportProtocol if supported by a protocol implementation will cause
|
|
|
|
// the dispatcher to delivery packets to the QueuePacket method instead of
|
|
|
|
// calling HandlePacket directly on the endpoint.
|
|
|
|
type queuedTransportProtocol interface {
|
2020-06-03 21:57:57 +00:00
|
|
|
QueuePacket(r *Route, ep TransportEndpoint, id TransportEndpointID, pkt *PacketBuffer)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newTransportDemuxer(stack *Stack) *transportDemuxer {
|
2020-01-14 22:14:17 +00:00
|
|
|
d := &transportDemuxer{
|
|
|
|
protocol: make(map[protocolIDs]*transportEndpoints),
|
|
|
|
queuedProtocols: make(map[protocolIDs]queuedTransportProtocol),
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Add each network and transport pair to the demuxer.
|
|
|
|
for netProto := range stack.networkProtocols {
|
|
|
|
for proto := range stack.transportProtocols {
|
2020-01-14 22:14:17 +00:00
|
|
|
protoIDs := protocolIDs{netProto, proto}
|
|
|
|
d.protocol[protoIDs] = &transportEndpoints{
|
2020-03-26 15:46:33 +00:00
|
|
|
endpoints: make(map[TransportEndpointID]*endpointsByNIC),
|
2019-02-27 22:30:20 +00:00
|
|
|
}
|
2020-01-14 22:14:17 +00:00
|
|
|
qTransProto, isQueued := (stack.transportProtocols[proto].proto).(queuedTransportProtocol)
|
|
|
|
if isQueued {
|
|
|
|
d.queuedProtocols[protoIDs] = qTransProto
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
|
|
|
// registerEndpoint registers the given endpoint with the dispatcher such that
|
|
|
|
// packets that match the endpoint ID are delivered to it.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error {
|
2018-04-27 17:37:02 +00:00
|
|
|
for i, n := range netProtos {
|
2020-06-11 06:48:03 +00:00
|
|
|
if err := d.singleRegisterEndpoint(n, protocol, id, ep, flags, bindToDevice); err != nil {
|
|
|
|
d.unregisterEndpoint(netProtos[:i], protocol, id, ep, flags, bindToDevice)
|
2018-04-27 17:37:02 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-06-24 02:14:05 +00:00
|
|
|
// checkEndpoint checks if an endpoint can be registered with the dispatcher.
|
|
|
|
func (d *transportDemuxer) checkEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error {
|
|
|
|
for _, n := range netProtos {
|
|
|
|
if err := d.singleCheckEndpoint(n, protocol, id, flags, bindToDevice); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-12-28 19:26:01 +00:00
|
|
|
// multiPortEndpoint is a container for TransportEndpoints which are bound to
|
2019-09-27 21:12:35 +00:00
|
|
|
// the same pair of address and port. endpointsArr always has at least one
|
|
|
|
// element.
|
2019-10-29 23:13:43 +00:00
|
|
|
//
|
|
|
|
// FIXME(gvisor.dev/issue/873): Restore this properly. Currently, we just save
|
|
|
|
// this to ensure that the underlying endpoints get saved/restored, but not not
|
|
|
|
// use the restored copy.
|
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-12-28 19:26:01 +00:00
|
|
|
type multiPortEndpoint struct {
|
2020-01-14 22:14:17 +00:00
|
|
|
mu sync.RWMutex `state:"nosave"`
|
|
|
|
demux *transportDemuxer
|
|
|
|
netProto tcpip.NetworkProtocolNumber
|
|
|
|
transProto tcpip.TransportProtocolNumber
|
|
|
|
|
2020-06-11 06:48:03 +00:00
|
|
|
// endpoints stores the transport endpoints in the order in which they
|
|
|
|
// were bound. This is required for UDP SO_REUSEADDR.
|
|
|
|
endpoints []TransportEndpoint
|
|
|
|
flags ports.FlagCounter
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
|
|
|
|
2019-10-29 23:13:43 +00:00
|
|
|
func (ep *multiPortEndpoint) transportEndpoints() []TransportEndpoint {
|
|
|
|
ep.mu.RLock()
|
2020-03-12 04:12:41 +00:00
|
|
|
eps := append([]TransportEndpoint(nil), ep.endpoints...)
|
2019-10-29 23:13:43 +00:00
|
|
|
ep.mu.RUnlock()
|
|
|
|
return eps
|
|
|
|
}
|
|
|
|
|
2018-12-28 19:26:01 +00:00
|
|
|
// reciprocalScale scales a value into range [0, n).
|
|
|
|
//
|
|
|
|
// This is similar to val % n, but faster.
|
|
|
|
// See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
|
|
|
func reciprocalScale(val, n uint32) uint32 {
|
|
|
|
return uint32((uint64(val) * uint64(n)) >> 32)
|
|
|
|
}
|
|
|
|
|
|
|
|
// selectEndpoint calculates a hash of destination and source addresses and
|
|
|
|
// ports then uses it to select a socket. In this case, all packets from one
|
|
|
|
// address will be sent to same endpoint.
|
2019-09-27 21:12:35 +00:00
|
|
|
func selectEndpoint(id TransportEndpointID, mpep *multiPortEndpoint, seed uint32) TransportEndpoint {
|
2020-03-12 04:12:41 +00:00
|
|
|
if len(mpep.endpoints) == 1 {
|
|
|
|
return mpep.endpoints[0]
|
2019-09-27 21:12:35 +00:00
|
|
|
}
|
2018-12-28 19:26:01 +00:00
|
|
|
|
2020-06-11 06:48:03 +00:00
|
|
|
if mpep.flags.IntersectionRefs().ToFlags().Effective().MostRecent {
|
|
|
|
return mpep.endpoints[len(mpep.endpoints)-1]
|
|
|
|
}
|
|
|
|
|
2018-12-28 19:26:01 +00:00
|
|
|
payload := []byte{
|
|
|
|
byte(id.LocalPort),
|
|
|
|
byte(id.LocalPort >> 8),
|
|
|
|
byte(id.RemotePort),
|
|
|
|
byte(id.RemotePort >> 8),
|
|
|
|
}
|
|
|
|
|
2019-09-27 21:12:35 +00:00
|
|
|
h := jenkins.Sum32(seed)
|
2018-12-28 19:26:01 +00:00
|
|
|
h.Write(payload)
|
|
|
|
h.Write([]byte(id.LocalAddress))
|
|
|
|
h.Write([]byte(id.RemoteAddress))
|
|
|
|
hash := h.Sum32()
|
|
|
|
|
2020-03-12 04:12:41 +00:00
|
|
|
idx := reciprocalScale(hash, uint32(len(mpep.endpoints)))
|
|
|
|
return mpep.endpoints[idx]
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 21:57:57 +00:00
|
|
|
func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
|
2019-09-27 21:12:35 +00:00
|
|
|
ep.mu.RLock()
|
2020-01-14 22:14:17 +00:00
|
|
|
queuedProtocol, mustQueue := ep.demux.queuedProtocols[protocolIDs{ep.netProto, ep.transProto}]
|
2020-03-12 04:12:41 +00:00
|
|
|
// HandlePacket takes ownership of pkt, so each endpoint needs
|
|
|
|
// its own copy except for the final one.
|
|
|
|
for _, endpoint := range ep.endpoints[:len(ep.endpoints)-1] {
|
2020-01-14 22:14:17 +00:00
|
|
|
if mustQueue {
|
|
|
|
queuedProtocol.QueuePacket(r, endpoint, id, pkt.Clone())
|
2020-03-12 04:12:41 +00:00
|
|
|
} else {
|
|
|
|
endpoint.HandlePacket(r, id, pkt.Clone())
|
2020-01-14 22:14:17 +00:00
|
|
|
}
|
2020-03-12 04:12:41 +00:00
|
|
|
}
|
|
|
|
if endpoint := ep.endpoints[len(ep.endpoints)-1]; mustQueue {
|
|
|
|
queuedProtocol.QueuePacket(r, endpoint, id, pkt)
|
|
|
|
} else {
|
|
|
|
endpoint.HandlePacket(r, id, pkt)
|
2019-02-20 20:53:07 +00:00
|
|
|
}
|
2019-09-27 21:12:35 +00:00
|
|
|
ep.mu.RUnlock() // Don't use defer for performance reasons.
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
|
|
|
|
2019-09-27 21:12:35 +00:00
|
|
|
// singleRegisterEndpoint tries to add an endpoint to the multiPortEndpoint
|
|
|
|
// list. The list might be empty already.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags ports.Flags) *tcpip.Error {
|
2018-12-28 19:26:01 +00:00
|
|
|
ep.mu.Lock()
|
|
|
|
defer ep.mu.Unlock()
|
|
|
|
|
2020-06-24 02:14:05 +00:00
|
|
|
bits := flags.Bits() & ports.MultiBindFlagMask
|
2020-06-11 06:48:03 +00:00
|
|
|
|
2020-03-12 04:12:41 +00:00
|
|
|
if len(ep.endpoints) != 0 {
|
2019-09-27 21:12:35 +00:00
|
|
|
// If it was previously bound, we need to check if we can bind again.
|
2020-06-11 06:48:03 +00:00
|
|
|
if ep.flags.TotalRefs() > 0 && bits&ep.flags.IntersectionRefs() == 0 {
|
2019-09-27 21:12:35 +00:00
|
|
|
return tcpip.ErrPortInUse
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-11 06:48:03 +00:00
|
|
|
ep.endpoints = append(ep.endpoints, t)
|
|
|
|
ep.flags.AddRef(bits)
|
2019-10-30 22:32:20 +00:00
|
|
|
|
2019-09-27 21:12:35 +00:00
|
|
|
return nil
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 02:14:05 +00:00
|
|
|
func (ep *multiPortEndpoint) singleCheckEndpoint(flags ports.Flags) *tcpip.Error {
|
|
|
|
ep.mu.RLock()
|
|
|
|
defer ep.mu.RUnlock()
|
|
|
|
|
|
|
|
bits := flags.Bits() & ports.MultiBindFlagMask
|
|
|
|
|
|
|
|
if len(ep.endpoints) != 0 {
|
|
|
|
// If it was previously bound, we need to check if we can bind again.
|
|
|
|
if ep.flags.TotalRefs() > 0 && bits&ep.flags.IntersectionRefs() == 0 {
|
|
|
|
return tcpip.ErrPortInUse
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-12-28 19:26:01 +00:00
|
|
|
// unregisterEndpoint returns true if multiPortEndpoint has to be unregistered.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (ep *multiPortEndpoint) unregisterEndpoint(t TransportEndpoint, flags ports.Flags) bool {
|
2018-12-28 19:26:01 +00:00
|
|
|
ep.mu.Lock()
|
|
|
|
defer ep.mu.Unlock()
|
|
|
|
|
2020-03-12 04:12:41 +00:00
|
|
|
for i, endpoint := range ep.endpoints {
|
|
|
|
if endpoint == t {
|
2020-06-11 06:48:03 +00:00
|
|
|
copy(ep.endpoints[i:], ep.endpoints[i+1:])
|
|
|
|
ep.endpoints[len(ep.endpoints)-1] = nil
|
|
|
|
ep.endpoints = ep.endpoints[:len(ep.endpoints)-1]
|
|
|
|
|
2020-06-24 02:14:05 +00:00
|
|
|
ep.flags.DropRef(flags.Bits() & ports.MultiBindFlagMask)
|
2020-03-12 04:12:41 +00:00
|
|
|
break
|
|
|
|
}
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
2020-03-12 04:12:41 +00:00
|
|
|
return len(ep.endpoints) == 0
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
|
|
|
|
2020-06-11 06:48:03 +00:00
|
|
|
func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error {
|
2018-12-28 19:26:01 +00:00
|
|
|
if id.RemotePort != 0 {
|
2020-06-11 06:48:03 +00:00
|
|
|
// SO_REUSEPORT only applies to bound/listening endpoints.
|
|
|
|
flags.LoadBalanced = false
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
eps, ok := d.protocol[protocolIDs{netProto, protocol}]
|
|
|
|
if !ok {
|
2019-09-27 21:12:35 +00:00
|
|
|
return tcpip.ErrUnknownProtocol
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
eps.mu.Lock()
|
|
|
|
defer eps.mu.Unlock()
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC, ok := eps.endpoints[id]
|
2020-03-12 04:12:41 +00:00
|
|
|
if !ok {
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC = &endpointsByNIC{
|
2020-03-12 04:12:41 +00:00
|
|
|
endpoints: make(map[tcpip.NICID]*multiPortEndpoint),
|
|
|
|
seed: rand.Uint32(),
|
|
|
|
}
|
2020-03-26 15:46:33 +00:00
|
|
|
eps.endpoints[id] = epsByNIC
|
2018-12-28 19:26:01 +00:00
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-06-11 06:48:03 +00:00
|
|
|
return epsByNIC.registerEndpoint(d, netProto, protocol, ep, flags, bindToDevice)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 02:14:05 +00:00
|
|
|
func (d *transportDemuxer) singleCheckEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error {
|
|
|
|
if id.RemotePort != 0 {
|
|
|
|
// SO_REUSEPORT only applies to bound/listening endpoints.
|
|
|
|
flags.LoadBalanced = false
|
|
|
|
}
|
|
|
|
|
|
|
|
eps, ok := d.protocol[protocolIDs{netProto, protocol}]
|
|
|
|
if !ok {
|
|
|
|
return tcpip.ErrUnknownProtocol
|
|
|
|
}
|
|
|
|
|
|
|
|
eps.mu.RLock()
|
|
|
|
defer eps.mu.RUnlock()
|
|
|
|
|
|
|
|
epsByNIC, ok := eps.endpoints[id]
|
|
|
|
if !ok {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return epsByNIC.checkEndpoint(d, netProto, protocol, flags, bindToDevice)
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// unregisterEndpoint unregisters the endpoint with the given id such that it
|
|
|
|
// won't receive any more packets.
|
2020-06-11 06:48:03 +00:00
|
|
|
func (d *transportDemuxer) unregisterEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
|
|
|
|
if id.RemotePort != 0 {
|
|
|
|
// SO_REUSEPORT only applies to bound/listening endpoints.
|
|
|
|
flags.LoadBalanced = false
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
for _, n := range netProtos {
|
|
|
|
if eps, ok := d.protocol[protocolIDs{n, protocol}]; ok {
|
2020-06-11 06:48:03 +00:00
|
|
|
eps.unregisterEndpoint(id, ep, flags, bindToDevice)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-20 20:53:07 +00:00
|
|
|
// deliverPacket attempts to find one or more matching transport endpoints, and
|
2019-11-12 23:48:34 +00:00
|
|
|
// then, if matches are found, delivers the packet to them. Returns true if
|
|
|
|
// the packet no longer needs to be handled.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer, id TransportEndpointID) bool {
|
2018-04-27 17:37:02 +00:00
|
|
|
eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}]
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-11-12 23:48:34 +00:00
|
|
|
// If the packet is a UDP broadcast or multicast, then find all matching
|
2020-03-13 19:20:09 +00:00
|
|
|
// transport endpoints.
|
2020-09-16 19:19:06 +00:00
|
|
|
if protocol == header.UDPProtocolNumber && isInboundMulticastOrBroadcast(r) {
|
2020-03-13 19:20:09 +00:00
|
|
|
eps.mu.RLock()
|
2020-03-26 15:46:33 +00:00
|
|
|
destEPs := eps.findAllEndpointsLocked(id)
|
2020-03-13 19:20:09 +00:00
|
|
|
eps.mu.RUnlock()
|
|
|
|
// Fail if we didn't find at least one matching transport endpoint.
|
|
|
|
if len(destEPs) == 0 {
|
|
|
|
r.Stats().UDP.UnknownPortErrors.Increment()
|
|
|
|
return false
|
2019-11-12 23:48:34 +00:00
|
|
|
}
|
2020-03-13 19:20:09 +00:00
|
|
|
// handlePacket takes ownership of pkt, so each endpoint needs its own
|
|
|
|
// copy except for the final one.
|
|
|
|
for _, ep := range destEPs[:len(destEPs)-1] {
|
|
|
|
ep.handlePacket(r, id, pkt.Clone())
|
2019-11-12 23:48:34 +00:00
|
|
|
}
|
2020-03-13 19:20:09 +00:00
|
|
|
destEPs[len(destEPs)-1].handlePacket(r, id, pkt)
|
|
|
|
return true
|
|
|
|
}
|
2019-11-12 23:48:34 +00:00
|
|
|
|
2020-03-13 19:20:09 +00:00
|
|
|
// If the packet is a TCP packet with a non-unicast source or destination
|
|
|
|
// address, then do nothing further and instruct the caller to do the same.
|
2020-09-16 19:19:06 +00:00
|
|
|
if protocol == header.TCPProtocolNumber && (!isInboundUnicast(r) || !isOutboundUnicast(r)) {
|
2020-03-13 19:20:09 +00:00
|
|
|
// TCP can only be used to communicate between a single source and a
|
|
|
|
// single destination; the addresses must be unicast.
|
|
|
|
r.Stats().TCP.InvalidSegmentsReceived.Increment()
|
|
|
|
return true
|
2019-02-20 20:53:07 +00:00
|
|
|
}
|
2019-02-27 22:30:20 +00:00
|
|
|
|
2020-03-13 19:20:09 +00:00
|
|
|
eps.mu.RLock()
|
2020-03-26 15:46:33 +00:00
|
|
|
ep := eps.findEndpointLocked(id)
|
2018-04-27 17:37:02 +00:00
|
|
|
eps.mu.RUnlock()
|
2020-03-13 19:20:09 +00:00
|
|
|
if ep == nil {
|
2018-08-27 22:28:38 +00:00
|
|
|
if protocol == header.UDPProtocolNumber {
|
|
|
|
r.Stats().UDP.UnknownPortErrors.Increment()
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
return false
|
|
|
|
}
|
2020-03-13 19:20:09 +00:00
|
|
|
ep.handlePacket(r, id, pkt)
|
2018-04-27 17:37:02 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2019-05-22 20:44:07 +00:00
|
|
|
// deliverRawPacket attempts to deliver the given packet and returns whether it
|
|
|
|
// was delivered successfully.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) bool {
|
2019-05-22 20:44:07 +00:00
|
|
|
eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}]
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// As in net/ipv4/ip_input.c:ip_local_deliver, attempt to deliver via
|
|
|
|
// raw endpoint first. If there are multiple raw endpoints, they all
|
|
|
|
// receive the packet.
|
|
|
|
foundRaw := false
|
|
|
|
eps.mu.RLock()
|
|
|
|
for _, rawEP := range eps.rawEndpoints {
|
|
|
|
// Each endpoint gets its own copy of the packet for the sake
|
|
|
|
// of save/restore.
|
2019-11-06 22:24:38 +00:00
|
|
|
rawEP.HandlePacket(r, pkt)
|
2019-05-22 20:44:07 +00:00
|
|
|
foundRaw = true
|
|
|
|
}
|
|
|
|
eps.mu.RUnlock()
|
|
|
|
|
|
|
|
return foundRaw
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// deliverControlPacket attempts to deliver the given control packet. Returns
|
|
|
|
// true if it found an endpoint, false otherwise.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer, id TransportEndpointID) bool {
|
2018-04-27 17:37:02 +00:00
|
|
|
eps, ok := d.protocol[protocolIDs{net, trans}]
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
eps.mu.RLock()
|
2020-03-26 15:46:33 +00:00
|
|
|
ep := eps.findEndpointLocked(id)
|
2018-04-27 17:37:02 +00:00
|
|
|
eps.mu.RUnlock()
|
|
|
|
if ep == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-11-06 22:24:38 +00:00
|
|
|
ep.handleControlPacket(n, id, typ, extra, pkt)
|
2018-04-27 17:37:02 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2019-11-07 17:45:26 +00:00
|
|
|
// findTransportEndpoint find a single endpoint that most closely matches the provided id.
|
|
|
|
func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, r *Route) TransportEndpoint {
|
|
|
|
eps, ok := d.protocol[protocolIDs{netProto, transProto}]
|
|
|
|
if !ok {
|
|
|
|
return nil
|
|
|
|
}
|
2020-03-26 15:46:33 +00:00
|
|
|
|
2019-11-07 17:45:26 +00:00
|
|
|
eps.mu.RLock()
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC := eps.findEndpointLocked(id)
|
|
|
|
if epsByNIC == nil {
|
2019-11-07 17:45:26 +00:00
|
|
|
eps.mu.RUnlock()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
epsByNIC.mu.RLock()
|
2019-11-07 17:45:26 +00:00
|
|
|
eps.mu.RUnlock()
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
mpep, ok := epsByNIC.endpoints[r.ref.nic.ID()]
|
2019-11-07 17:45:26 +00:00
|
|
|
if !ok {
|
2020-03-26 15:46:33 +00:00
|
|
|
if mpep, ok = epsByNIC.endpoints[0]; !ok {
|
|
|
|
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
|
2019-11-07 17:45:26 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-26 15:46:33 +00:00
|
|
|
ep := selectEndpoint(id, mpep, epsByNIC.seed)
|
|
|
|
epsByNIC.mu.RUnlock()
|
2019-11-07 17:45:26 +00:00
|
|
|
return ep
|
|
|
|
}
|
|
|
|
|
2019-02-27 22:30:20 +00:00
|
|
|
// registerRawEndpoint registers the given endpoint with the dispatcher such
|
|
|
|
// that packets of the appropriate protocol are delivered to it. A single
|
|
|
|
// packet can be sent to one or more raw endpoints along with a non-raw
|
|
|
|
// endpoint.
|
2019-04-02 18:12:29 +00:00
|
|
|
func (d *transportDemuxer) registerRawEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error {
|
|
|
|
eps, ok := d.protocol[protocolIDs{netProto, transProto}]
|
2019-02-27 22:30:20 +00:00
|
|
|
if !ok {
|
2019-10-21 20:21:58 +00:00
|
|
|
return tcpip.ErrNotSupported
|
2019-02-27 22:30:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
eps.mu.Lock()
|
|
|
|
eps.rawEndpoints = append(eps.rawEndpoints, ep)
|
2020-03-12 04:12:41 +00:00
|
|
|
eps.mu.Unlock()
|
2019-02-27 22:30:20 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-04-02 18:12:29 +00:00
|
|
|
// unregisterRawEndpoint unregisters the raw endpoint for the given transport
|
|
|
|
// protocol such that it won't receive any more packets.
|
|
|
|
func (d *transportDemuxer) unregisterRawEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) {
|
|
|
|
eps, ok := d.protocol[protocolIDs{netProto, transProto}]
|
|
|
|
if !ok {
|
|
|
|
panic(fmt.Errorf("tried to unregister endpoint with unsupported network and transport protocol pair: %d, %d", netProto, transProto))
|
|
|
|
}
|
|
|
|
|
|
|
|
eps.mu.Lock()
|
|
|
|
for i, rawEP := range eps.rawEndpoints {
|
|
|
|
if rawEP == ep {
|
2020-03-12 04:12:41 +00:00
|
|
|
lastIdx := len(eps.rawEndpoints) - 1
|
|
|
|
eps.rawEndpoints[i] = eps.rawEndpoints[lastIdx]
|
|
|
|
eps.rawEndpoints[lastIdx] = nil
|
|
|
|
eps.rawEndpoints = eps.rawEndpoints[:lastIdx]
|
|
|
|
break
|
2019-02-27 22:30:20 +00:00
|
|
|
}
|
|
|
|
}
|
2020-03-12 04:12:41 +00:00
|
|
|
eps.mu.Unlock()
|
2019-02-27 22:30:20 +00:00
|
|
|
}
|
2019-10-04 02:30:01 +00:00
|
|
|
|
2020-09-16 19:19:06 +00:00
|
|
|
func isInboundMulticastOrBroadcast(r *Route) bool {
|
|
|
|
return r.IsInboundBroadcast() || header.IsV4MulticastAddress(r.LocalAddress) || header.IsV6MulticastAddress(r.LocalAddress)
|
2019-10-04 02:30:01 +00:00
|
|
|
}
|
2019-11-12 23:48:34 +00:00
|
|
|
|
2020-09-16 19:19:06 +00:00
|
|
|
func isInboundUnicast(r *Route) bool {
|
|
|
|
return r.LocalAddress != header.IPv4Any && r.LocalAddress != header.IPv6Any && !isInboundMulticastOrBroadcast(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
func isOutboundUnicast(r *Route) bool {
|
|
|
|
return r.RemoteAddress != header.IPv4Any && r.RemoteAddress != header.IPv6Any && !r.IsOutboundBroadcast() && !header.IsV4MulticastAddress(r.RemoteAddress) && !header.IsV6MulticastAddress(r.RemoteAddress)
|
2019-11-12 23:48:34 +00:00
|
|
|
}
|