2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-07-09 21:03:03 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-09-28 23:22:09 +00:00
|
|
|
// Package ipv4 contains the implementation of the ipv4 network protocol.
|
2018-04-27 17:37:02 +00:00
|
|
|
package ipv4
|
|
|
|
|
|
|
|
import (
|
2020-09-27 02:23:01 +00:00
|
|
|
"fmt"
|
2018-04-27 17:37:02 +00:00
|
|
|
"sync/atomic"
|
2020-10-08 07:54:05 +00:00
|
|
|
"time"
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sync"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
2020-09-18 07:46:26 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/header/parse"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/fragmentation"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/hash"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2020-10-08 07:54:05 +00:00
|
|
|
// As per RFC 791 section 3.2:
|
|
|
|
// The current recommendation for the initial timer setting is 15 seconds.
|
|
|
|
// This may be changed as experience with this protocol accumulates.
|
|
|
|
//
|
|
|
|
// Considering that it is an old recommendation, we use the same reassembly
|
|
|
|
// timeout that linux defines, which is 30 seconds:
|
|
|
|
// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138
|
|
|
|
reassembleTimeout = 30 * time.Second
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// ProtocolNumber is the ipv4 protocol number.
|
|
|
|
ProtocolNumber = header.IPv4ProtocolNumber
|
|
|
|
|
2019-02-26 22:57:27 +00:00
|
|
|
// MaxTotalSize is maximum size that can be encoded in the 16-bit
|
2018-04-27 17:37:02 +00:00
|
|
|
// TotalLength field of the ipv4 header.
|
2019-02-26 22:57:27 +00:00
|
|
|
MaxTotalSize = 0xffff
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2019-10-08 02:28:26 +00:00
|
|
|
// DefaultTTL is the default time-to-live value for this endpoint.
|
|
|
|
DefaultTTL = 64
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// buckets is the number of identifier buckets.
|
|
|
|
buckets = 2048
|
2020-07-30 21:19:38 +00:00
|
|
|
|
|
|
|
// The size of a fragment block, in bytes, as per RFC 791 section 3.1,
|
|
|
|
// page 14.
|
|
|
|
fragmentblockSize = 8
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix()
|
|
|
|
|
|
|
|
var _ stack.GroupAddressableEndpoint = (*endpoint)(nil)
|
|
|
|
var _ stack.AddressableEndpoint = (*endpoint)(nil)
|
|
|
|
var _ stack.NetworkEndpoint = (*endpoint)(nil)
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
type endpoint struct {
|
2020-09-29 07:18:37 +00:00
|
|
|
nic stack.NetworkInterface
|
2020-08-04 23:25:15 +00:00
|
|
|
dispatcher stack.TransportDispatcher
|
|
|
|
protocol *protocol
|
2020-09-29 07:18:37 +00:00
|
|
|
|
|
|
|
// enabled is set to 1 when the enpoint is enabled and 0 when it is
|
|
|
|
// disabled.
|
|
|
|
//
|
|
|
|
// Must be accessed using atomic operations.
|
|
|
|
enabled uint32
|
|
|
|
|
|
|
|
mu struct {
|
|
|
|
sync.RWMutex
|
|
|
|
|
|
|
|
addressableEndpointState stack.AddressableEndpointState
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2018-10-31 15:04:05 +00:00
|
|
|
// NewEndpoint creates a new ipv4 endpoint.
|
2020-09-29 09:04:11 +00:00
|
|
|
func (p *protocol) NewEndpoint(nic stack.NetworkInterface, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
|
2020-09-29 07:18:37 +00:00
|
|
|
e := &endpoint{
|
|
|
|
nic: nic,
|
2020-08-04 23:25:15 +00:00
|
|
|
dispatcher: dispatcher,
|
|
|
|
protocol: p,
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
2020-09-29 07:18:37 +00:00
|
|
|
e.mu.addressableEndpointState.Init(e)
|
|
|
|
return e
|
|
|
|
}
|
|
|
|
|
|
|
|
// Enable implements stack.NetworkEndpoint.
|
|
|
|
func (e *endpoint) Enable() *tcpip.Error {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
|
|
|
|
// If the NIC is not enabled, the endpoint can't do anything meaningful so
|
|
|
|
// don't enable the endpoint.
|
|
|
|
if !e.nic.Enabled() {
|
|
|
|
return tcpip.ErrNotPermitted
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the endpoint is already enabled, there is nothing for it to do.
|
|
|
|
if !e.setEnabled(true) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create an endpoint to receive broadcast packets on this interface.
|
|
|
|
ep, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.NeverPrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// We have no need for the address endpoint.
|
|
|
|
ep.DecRef()
|
|
|
|
|
|
|
|
// As per RFC 1122 section 3.3.7, all hosts should join the all-hosts
|
|
|
|
// multicast group. Note, the IANA calls the all-hosts multicast group the
|
|
|
|
// all-systems multicast group.
|
|
|
|
_, err = e.mu.addressableEndpointState.JoinGroup(header.IPv4AllSystems)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Enabled implements stack.NetworkEndpoint.
|
|
|
|
func (e *endpoint) Enabled() bool {
|
|
|
|
return e.nic.Enabled() && e.isEnabled()
|
|
|
|
}
|
|
|
|
|
|
|
|
// isEnabled returns true if the endpoint is enabled, regardless of the
|
|
|
|
// enabled status of the NIC.
|
|
|
|
func (e *endpoint) isEnabled() bool {
|
|
|
|
return atomic.LoadUint32(&e.enabled) == 1
|
|
|
|
}
|
|
|
|
|
|
|
|
// setEnabled sets the enabled status for the endpoint.
|
|
|
|
//
|
|
|
|
// Returns true if the enabled status was updated.
|
|
|
|
func (e *endpoint) setEnabled(v bool) bool {
|
|
|
|
if v {
|
|
|
|
return atomic.SwapUint32(&e.enabled, 1) == 0
|
|
|
|
}
|
|
|
|
return atomic.SwapUint32(&e.enabled, 0) == 1
|
|
|
|
}
|
|
|
|
|
|
|
|
// Disable implements stack.NetworkEndpoint.
|
|
|
|
func (e *endpoint) Disable() {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
e.disableLocked()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *endpoint) disableLocked() {
|
|
|
|
if !e.setEnabled(false) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// The endpoint may have already left the multicast group.
|
|
|
|
if _, err := e.mu.addressableEndpointState.LeaveGroup(header.IPv4AllSystems); err != nil && err != tcpip.ErrBadLocalAddress {
|
|
|
|
panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err))
|
|
|
|
}
|
|
|
|
|
|
|
|
// The address may have already been removed.
|
|
|
|
if err := e.mu.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err != nil && err != tcpip.ErrBadLocalAddress {
|
|
|
|
panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err))
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2018-09-13 03:38:27 +00:00
|
|
|
// DefaultTTL is the default time-to-live value for this endpoint.
|
|
|
|
func (e *endpoint) DefaultTTL() uint8 {
|
2019-10-08 02:28:26 +00:00
|
|
|
return e.protocol.DefaultTTL()
|
2018-09-13 03:38:27 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
|
|
|
|
// the network layer max header length.
|
|
|
|
func (e *endpoint) MTU() uint32 {
|
2020-10-22 05:10:13 +00:00
|
|
|
networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize)
|
|
|
|
if err != nil {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return networkMTU
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// MaxHeaderLength returns the maximum length needed by ipv4 headers (and
|
|
|
|
// underlying protocols).
|
|
|
|
func (e *endpoint) MaxHeaderLength() uint16 {
|
2020-10-09 19:07:02 +00:00
|
|
|
return e.nic.MaxHeaderLength() + header.IPv4MaximumHeaderSize
|
2019-03-28 18:02:23 +00:00
|
|
|
}
|
|
|
|
|
2020-04-30 23:39:18 +00:00
|
|
|
// NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber.
|
|
|
|
func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
|
|
|
|
return e.protocol.Number()
|
|
|
|
}
|
|
|
|
|
2020-08-13 20:07:03 +00:00
|
|
|
func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams) {
|
|
|
|
ip := header.IPv4(pkt.NetworkHeader().Push(header.IPv4MinimumSize))
|
|
|
|
length := uint16(pkt.Size())
|
2020-07-07 23:13:21 +00:00
|
|
|
// RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic
|
|
|
|
// datagrams. Since the DF bit is never being set here, all datagrams
|
|
|
|
// are non-atomic and need an ID.
|
|
|
|
id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1)
|
2018-04-27 17:37:02 +00:00
|
|
|
ip.Encode(&header.IPv4Fields{
|
|
|
|
IHL: header.IPv4MinimumSize,
|
|
|
|
TotalLength: length,
|
|
|
|
ID: uint16(id),
|
2019-10-15 00:45:29 +00:00
|
|
|
TTL: params.TTL,
|
|
|
|
TOS: params.TOS,
|
|
|
|
Protocol: uint8(params.Protocol),
|
2018-10-31 15:04:05 +00:00
|
|
|
SrcAddr: r.LocalAddress,
|
2018-04-27 17:37:02 +00:00
|
|
|
DstAddr: r.RemoteAddress,
|
|
|
|
})
|
|
|
|
ip.SetChecksum(^ip.CalculateChecksum())
|
2020-10-06 20:55:02 +00:00
|
|
|
pkt.NetworkProtocolNumber = ProtocolNumber
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
|
2020-10-16 18:55:31 +00:00
|
|
|
// handleFragments fragments pkt and calls the handler function on each
|
|
|
|
// fragment. It returns the number of fragments handled and the number of
|
|
|
|
// fragments left to be processed. The IP header must already be present in the
|
2020-10-22 05:10:13 +00:00
|
|
|
// original packet.
|
|
|
|
func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
|
|
|
|
// Round the MTU down to align to 8 bytes.
|
|
|
|
fragmentPayloadSize := networkMTU &^ 7
|
2020-10-16 18:55:31 +00:00
|
|
|
networkHeader := header.IPv4(pkt.NetworkHeader().View())
|
2020-10-22 05:10:13 +00:00
|
|
|
pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader))
|
2020-10-16 18:55:31 +00:00
|
|
|
|
|
|
|
var n int
|
|
|
|
for {
|
|
|
|
fragPkt, more := buildNextFragment(&pf, networkHeader)
|
|
|
|
if err := handler(fragPkt); err != nil {
|
|
|
|
return n, pf.RemainingFragmentCount() + 1, err
|
|
|
|
}
|
|
|
|
n++
|
|
|
|
if !more {
|
|
|
|
return n, pf.RemainingFragmentCount(), nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-22 18:54:14 +00:00
|
|
|
// WritePacket writes a packet to the given destination address and protocol.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
|
2020-08-13 20:07:03 +00:00
|
|
|
e.addIPHeader(r, pkt, params)
|
2020-10-16 17:40:35 +00:00
|
|
|
return e.writePacket(r, gso, pkt)
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-10-16 17:40:35 +00:00
|
|
|
func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer) *tcpip.Error {
|
2020-03-20 19:00:21 +00:00
|
|
|
// iptables filtering. All packets that reach here are locally
|
|
|
|
// generated.
|
2020-09-29 09:04:11 +00:00
|
|
|
nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
|
|
|
|
ipt := e.protocol.stack.IPTables()
|
2020-06-03 21:57:57 +00:00
|
|
|
if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
|
2020-03-20 19:00:21 +00:00
|
|
|
// iptables is telling us to drop the packet.
|
2020-09-18 18:06:53 +00:00
|
|
|
r.Stats().IP.IPTablesOutputDropped.Increment()
|
2020-03-20 19:00:21 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-09-18 04:52:54 +00:00
|
|
|
// If the packet is manipulated as per NAT Output rules, handle packet
|
|
|
|
// based on destination address and do not send the packet to link
|
|
|
|
// layer.
|
|
|
|
//
|
|
|
|
// TODO(gvisor.dev/issue/170): We should do this for every
|
|
|
|
// packet, rather than only NATted packets, but removing this check
|
|
|
|
// short circuits broadcasts before they are sent out to other hosts.
|
2020-03-27 19:18:45 +00:00
|
|
|
if pkt.NatDone {
|
2020-08-13 20:07:03 +00:00
|
|
|
netHeader := header.IPv4(pkt.NetworkHeader().View())
|
2020-10-06 20:55:02 +00:00
|
|
|
ep, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, netHeader.DestinationAddress())
|
2020-03-27 19:18:45 +00:00
|
|
|
if err == nil {
|
2020-06-04 02:57:39 +00:00
|
|
|
route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
|
2020-06-07 20:37:25 +00:00
|
|
|
ep.HandlePacket(&route, pkt)
|
2020-03-27 19:18:45 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketLoop != 0 {
|
2019-06-05 23:07:18 +00:00
|
|
|
loopedR := r.MakeLoopedRoute()
|
2020-06-07 20:37:25 +00:00
|
|
|
e.HandlePacket(&loopedR, pkt)
|
2019-06-05 23:07:18 +00:00
|
|
|
loopedR.Release()
|
2019-03-08 23:48:16 +00:00
|
|
|
}
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketOut == 0 {
|
2019-03-08 23:48:16 +00:00
|
|
|
return nil
|
|
|
|
}
|
2020-10-16 18:55:31 +00:00
|
|
|
|
2020-10-22 05:10:13 +00:00
|
|
|
networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
|
|
|
|
if err != nil {
|
|
|
|
r.Stats().IP.OutgoingPacketErrors.Increment()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if packetMustBeFragmented(pkt, networkMTU, gso) {
|
|
|
|
sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
|
2020-10-16 18:55:31 +00:00
|
|
|
// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
|
|
|
|
// fragment one by one using WritePacket() (current strategy) or if we
|
|
|
|
// want to create a PacketBufferList from the fragments and feed it to
|
|
|
|
// WritePackets(). It'll be faster but cost more memory.
|
|
|
|
return e.nic.WritePacket(r, gso, ProtocolNumber, fragPkt)
|
|
|
|
})
|
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(sent))
|
|
|
|
r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(remain))
|
|
|
|
return err
|
2019-05-03 20:29:20 +00:00
|
|
|
}
|
2020-10-22 05:10:13 +00:00
|
|
|
|
2020-10-09 19:07:02 +00:00
|
|
|
if err := e.nic.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
|
2020-09-30 22:08:00 +00:00
|
|
|
r.Stats().IP.OutgoingPacketErrors.Increment()
|
2019-05-03 20:29:20 +00:00
|
|
|
return err
|
|
|
|
}
|
2019-03-08 23:48:16 +00:00
|
|
|
r.Stats().IP.PacketsSent.Increment()
|
2019-05-03 20:29:20 +00:00
|
|
|
return nil
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2019-10-22 18:54:14 +00:00
|
|
|
// WritePackets implements stack.NetworkEndpoint.WritePackets.
|
2020-04-04 01:34:48 +00:00
|
|
|
func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, *tcpip.Error) {
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketLoop != 0 {
|
2019-10-22 18:54:14 +00:00
|
|
|
panic("multiple packets in local loop")
|
|
|
|
}
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketOut == 0 {
|
2020-04-04 01:34:48 +00:00
|
|
|
return pkts.Len(), nil
|
|
|
|
}
|
|
|
|
|
2020-10-16 18:55:31 +00:00
|
|
|
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
|
2020-08-13 20:07:03 +00:00
|
|
|
e.addIPHeader(r, pkt, params)
|
2020-10-22 05:10:13 +00:00
|
|
|
networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
|
|
|
|
if err != nil {
|
|
|
|
r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if packetMustBeFragmented(pkt, networkMTU, gso) {
|
2020-10-16 18:55:31 +00:00
|
|
|
// Keep track of the packet that is about to be fragmented so it can be
|
|
|
|
// removed once the fragmentation is done.
|
|
|
|
originalPkt := pkt
|
2020-10-22 05:10:13 +00:00
|
|
|
if _, _, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
|
2020-10-16 18:55:31 +00:00
|
|
|
// Modify the packet list in place with the new fragments.
|
|
|
|
pkts.InsertAfter(pkt, fragPkt)
|
|
|
|
pkt = fragPkt
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
2020-10-22 05:10:13 +00:00
|
|
|
panic(fmt.Sprintf("e.handleFragments(_, _, %d, _, _) = %s", networkMTU, err))
|
2020-10-16 18:55:31 +00:00
|
|
|
}
|
|
|
|
// Remove the packet that was just fragmented and process the rest.
|
|
|
|
pkts.Remove(originalPkt)
|
|
|
|
}
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
|
2020-09-29 09:04:11 +00:00
|
|
|
nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
|
2020-03-20 19:00:21 +00:00
|
|
|
// iptables filtering. All packets that reach here are locally
|
|
|
|
// generated.
|
2020-09-29 09:04:11 +00:00
|
|
|
ipt := e.protocol.stack.IPTables()
|
2020-05-08 22:39:04 +00:00
|
|
|
dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
|
2020-03-27 19:18:45 +00:00
|
|
|
if len(dropped) == 0 && len(natPkts) == 0 {
|
2020-04-04 01:34:48 +00:00
|
|
|
// Fast path: If no packets are to be dropped then we can just invoke the
|
|
|
|
// faster WritePackets API directly.
|
2020-10-09 19:07:02 +00:00
|
|
|
n, err := e.nic.WritePackets(r, gso, pkts, ProtocolNumber)
|
2020-04-04 01:34:48 +00:00
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
|
2020-09-30 22:08:00 +00:00
|
|
|
if err != nil {
|
|
|
|
r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len() - n))
|
|
|
|
}
|
2020-04-04 01:34:48 +00:00
|
|
|
return n, err
|
|
|
|
}
|
2020-09-18 18:06:53 +00:00
|
|
|
r.Stats().IP.IPTablesOutputDropped.IncrementBy(uint64(len(dropped)))
|
2020-04-04 01:34:48 +00:00
|
|
|
|
2020-09-18 04:52:54 +00:00
|
|
|
// Slow path as we are dropping some packets in the batch degrade to
|
2020-04-04 01:34:48 +00:00
|
|
|
// emitting one packet at a time.
|
|
|
|
n := 0
|
|
|
|
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
|
|
|
|
if _, ok := dropped[pkt]; ok {
|
2020-03-20 19:00:21 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-03-27 19:18:45 +00:00
|
|
|
if _, ok := natPkts[pkt]; ok {
|
2020-08-13 20:07:03 +00:00
|
|
|
netHeader := header.IPv4(pkt.NetworkHeader().View())
|
2020-10-06 20:55:02 +00:00
|
|
|
if ep, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, netHeader.DestinationAddress()); err == nil {
|
2020-03-27 19:18:45 +00:00
|
|
|
src := netHeader.SourceAddress()
|
|
|
|
dst := netHeader.DestinationAddress()
|
|
|
|
route := r.ReverseRoute(src, dst)
|
2020-06-07 20:37:25 +00:00
|
|
|
ep.HandlePacket(&route, pkt)
|
2020-03-27 19:18:45 +00:00
|
|
|
n++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
2020-10-09 19:07:02 +00:00
|
|
|
if err := e.nic.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
|
2020-04-04 01:34:48 +00:00
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
|
2020-09-30 22:08:00 +00:00
|
|
|
r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len() - n - len(dropped)))
|
2020-09-18 18:06:53 +00:00
|
|
|
// Dropped packets aren't errors, so include them in
|
|
|
|
// the return value.
|
|
|
|
return n + len(dropped), err
|
2020-04-04 01:34:48 +00:00
|
|
|
}
|
|
|
|
n++
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
|
2020-09-18 18:06:53 +00:00
|
|
|
// Dropped packets aren't errors, so include them in the return value.
|
|
|
|
return n + len(dropped), nil
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
|
2020-10-16 17:40:35 +00:00
|
|
|
// WriteHeaderIncludedPacket implements stack.NetworkEndpoint.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
|
2019-07-13 01:08:03 +00:00
|
|
|
// The packet already has an IP header, but there are a few required
|
|
|
|
// checks.
|
2020-05-01 23:08:26 +00:00
|
|
|
h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
|
|
|
|
if !ok {
|
2020-10-16 17:40:35 +00:00
|
|
|
return tcpip.ErrMalformedHeader
|
2020-05-01 23:08:26 +00:00
|
|
|
}
|
|
|
|
ip := header.IPv4(h)
|
2019-07-13 01:08:03 +00:00
|
|
|
|
|
|
|
// Always set the total length.
|
2020-10-16 17:40:35 +00:00
|
|
|
pktSize := pkt.Data.Size()
|
|
|
|
ip.SetTotalLength(uint16(pktSize))
|
2019-07-13 01:08:03 +00:00
|
|
|
|
|
|
|
// Set the source address when zero.
|
2020-10-16 17:40:35 +00:00
|
|
|
if ip.SourceAddress() == header.IPv4Any {
|
2019-07-13 01:08:03 +00:00
|
|
|
ip.SetSourceAddress(r.LocalAddress)
|
|
|
|
}
|
|
|
|
|
2020-10-16 17:40:35 +00:00
|
|
|
// Set the destination. If the packet already included a destination, it will
|
|
|
|
// be part of the route anyways.
|
2019-07-13 01:08:03 +00:00
|
|
|
ip.SetDestinationAddress(r.RemoteAddress)
|
|
|
|
|
|
|
|
// Set the packet ID when zero.
|
|
|
|
if ip.ID() == 0 {
|
2020-07-07 23:13:21 +00:00
|
|
|
// RFC 6864 section 4.3 mandates uniqueness of ID values for
|
|
|
|
// non-atomic datagrams, so assign an ID to all such datagrams
|
|
|
|
// according to the definition given in RFC 6864 section 4.
|
|
|
|
if ip.Flags()&header.IPv4FlagDontFragment == 0 || ip.Flags()&header.IPv4FlagMoreFragments != 0 || ip.FragmentOffset() > 0 {
|
|
|
|
ip.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)))
|
2019-07-13 01:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always set the checksum.
|
|
|
|
ip.SetChecksum(0)
|
|
|
|
ip.SetChecksum(^ip.CalculateChecksum())
|
|
|
|
|
2020-10-16 17:40:35 +00:00
|
|
|
// Populate the packet buffer's network header and don't allow an invalid
|
|
|
|
// packet to be sent.
|
|
|
|
//
|
|
|
|
// Note that parsing only makes sure that the packet is well formed as per the
|
|
|
|
// wire format. We also want to check if the header's fields are valid before
|
|
|
|
// sending the packet.
|
|
|
|
if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().View()).IsValid(pktSize) {
|
|
|
|
return tcpip.ErrMalformedHeader
|
2019-07-13 01:08:03 +00:00
|
|
|
}
|
|
|
|
|
2020-10-16 17:40:35 +00:00
|
|
|
return e.writePacket(r, nil /* gso */, pkt)
|
2019-07-13 01:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// HandlePacket is called by the link layer when new ipv4 packets arrive for
|
|
|
|
// this endpoint.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
|
2020-09-29 07:18:37 +00:00
|
|
|
if !e.isEnabled() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-08-13 20:07:03 +00:00
|
|
|
h := header.IPv4(pkt.NetworkHeader().View())
|
|
|
|
if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
|
2020-05-01 23:08:26 +00:00
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2020-01-21 22:47:17 +00:00
|
|
|
|
2020-10-16 19:28:54 +00:00
|
|
|
// There has been some confusion regarding verifying checksums. We need
|
|
|
|
// just look for negative 0 (0xffff) as the checksum, as it's not possible to
|
|
|
|
// get positive 0 (0) for the checksum. Some bad implementations could get it
|
|
|
|
// when doing entry replacement in the early days of the Internet,
|
|
|
|
// however the lore that one needs to check for both persists.
|
|
|
|
//
|
|
|
|
// RFC 1624 section 1 describes the source of this confusion as:
|
|
|
|
// [the partial recalculation method described in RFC 1071] computes a
|
|
|
|
// result for certain cases that differs from the one obtained from
|
|
|
|
// scratch (one's complement of one's complement sum of the original
|
|
|
|
// fields).
|
|
|
|
//
|
|
|
|
// However RFC 1624 section 5 clarifies that if using the verification method
|
|
|
|
// "recommended by RFC 1071, it does not matter if an intermediate system
|
|
|
|
// generated a -0 instead of +0".
|
|
|
|
//
|
|
|
|
// RFC1071 page 1 specifies the verification method as:
|
|
|
|
// (3) To check a checksum, the 1's complement sum is computed over the
|
|
|
|
// same set of octets, including the checksum field. If the result
|
|
|
|
// is all 1 bits (-0 in 1's complement arithmetic), the check
|
|
|
|
// succeeds.
|
|
|
|
if h.CalculateChecksum() != 0xffff {
|
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-09-29 23:52:14 +00:00
|
|
|
// As per RFC 1122 section 3.2.1.3:
|
|
|
|
// When a host sends any datagram, the IP source address MUST
|
|
|
|
// be one of its own IP addresses (but not a broadcast or
|
|
|
|
// multicast address).
|
|
|
|
if r.IsOutboundBroadcast() || header.IsV4MulticastAddress(r.RemoteAddress) {
|
|
|
|
r.Stats().IP.InvalidSourceAddressesReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-01-11 02:07:15 +00:00
|
|
|
// iptables filtering. All packets that reach here are intended for
|
|
|
|
// this machine and will not be forwarded.
|
2020-09-29 09:04:11 +00:00
|
|
|
ipt := e.protocol.stack.IPTables()
|
2020-06-03 21:57:57 +00:00
|
|
|
if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
|
2020-01-08 22:48:47 +00:00
|
|
|
// iptables is telling us to drop the packet.
|
2020-09-18 18:06:53 +00:00
|
|
|
r.Stats().IP.IPTablesInputDropped.Increment()
|
2020-01-08 22:48:47 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-06-07 20:37:25 +00:00
|
|
|
if h.More() || h.FragmentOffset() != 0 {
|
2020-08-13 20:07:03 +00:00
|
|
|
if pkt.Data.Size()+pkt.TransportHeader().View().Size() == 0 {
|
2019-10-10 22:13:39 +00:00
|
|
|
// Drop the packet as it's marked as a fragment but has
|
|
|
|
// no payload.
|
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
r.Stats().IP.MalformedFragmentsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
// The packet is a fragment, let's try to reassemble it.
|
2020-09-13 06:19:34 +00:00
|
|
|
start := h.FragmentOffset()
|
|
|
|
// Drop the fragment if the size of the reassembled payload would exceed the
|
|
|
|
// maximum payload size.
|
|
|
|
//
|
|
|
|
// Note that this addition doesn't overflow even on 32bit architecture
|
|
|
|
// because pkt.Data.Size() should not exceed 65535 (the max IP datagram
|
|
|
|
// size). Otherwise the packet would've been rejected as invalid before
|
|
|
|
// reaching here.
|
|
|
|
if int(start)+pkt.Data.Size() > header.IPv4MaximumPayloadSize {
|
2019-10-10 22:13:39 +00:00
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
r.Stats().IP.MalformedFragmentsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2018-09-13 04:57:04 +00:00
|
|
|
var ready bool
|
2019-10-16 00:03:13 +00:00
|
|
|
var err error
|
2020-08-20 19:04:36 +00:00
|
|
|
proto := h.Protocol()
|
|
|
|
pkt.Data, _, ready, err = e.protocol.fragmentation.Process(
|
2020-08-04 23:25:15 +00:00
|
|
|
// As per RFC 791 section 2.3, the identification value is unique
|
|
|
|
// for a source-destination pair and protocol.
|
2020-07-31 21:18:18 +00:00
|
|
|
fragmentation.FragmentID{
|
|
|
|
Source: h.SourceAddress(),
|
|
|
|
Destination: h.DestinationAddress(),
|
|
|
|
ID: uint32(h.ID()),
|
2020-08-20 19:04:36 +00:00
|
|
|
Protocol: proto,
|
2020-07-31 21:18:18 +00:00
|
|
|
},
|
2020-09-13 06:19:34 +00:00
|
|
|
start,
|
|
|
|
start+uint16(pkt.Data.Size())-1,
|
2020-07-31 21:18:18 +00:00
|
|
|
h.More(),
|
2020-08-20 19:04:36 +00:00
|
|
|
proto,
|
2020-07-31 21:18:18 +00:00
|
|
|
pkt.Data,
|
|
|
|
)
|
2019-10-16 00:03:13 +00:00
|
|
|
if err != nil {
|
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
r.Stats().IP.MalformedFragmentsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
if !ready {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2020-09-29 23:52:14 +00:00
|
|
|
|
|
|
|
r.Stats().IP.PacketsDelivered.Increment()
|
2018-04-27 17:37:02 +00:00
|
|
|
p := h.TransportProtocol()
|
|
|
|
if p == header.ICMPv4ProtocolNumber {
|
2020-09-23 09:26:50 +00:00
|
|
|
// TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport
|
|
|
|
// headers, the setting of the transport number here should be
|
|
|
|
// unnecessary and removed.
|
|
|
|
pkt.TransportProtocolNumber = p
|
2019-11-06 22:24:38 +00:00
|
|
|
e.handleICMP(r, pkt)
|
2018-04-27 17:37:02 +00:00
|
|
|
return
|
|
|
|
}
|
2020-09-27 02:23:01 +00:00
|
|
|
|
|
|
|
switch res := e.dispatcher.DeliverTransportPacket(r, p, pkt); res {
|
|
|
|
case stack.TransportPacketHandled:
|
|
|
|
case stack.TransportPacketDestinationPortUnreachable:
|
|
|
|
// As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination
|
|
|
|
// Unreachable messages with code:
|
|
|
|
// 3 (Port Unreachable), when the designated transport protocol
|
|
|
|
// (e.g., UDP) is unable to demultiplex the datagram but has no
|
|
|
|
// protocol mechanism to inform the sender.
|
2020-10-09 19:07:02 +00:00
|
|
|
_ = e.protocol.returnError(r, &icmpReasonPortUnreachable{}, pkt)
|
2020-09-30 20:03:15 +00:00
|
|
|
case stack.TransportPacketProtocolUnreachable:
|
|
|
|
// As per RFC: 1122 Section 3.2.2.1
|
|
|
|
// A host SHOULD generate Destination Unreachable messages with code:
|
|
|
|
// 2 (Protocol Unreachable), when the designated transport protocol
|
|
|
|
// is not supported
|
2020-10-09 19:07:02 +00:00
|
|
|
_ = e.protocol.returnError(r, &icmpReasonProtoUnreachable{}, pkt)
|
2020-09-27 02:23:01 +00:00
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close cleans up resources associated with the endpoint.
|
2020-09-29 07:18:37 +00:00
|
|
|
func (e *endpoint) Close() {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
e.disableLocked()
|
|
|
|
e.mu.addressableEndpointState.Cleanup()
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddAndAcquirePermanentAddress implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
return e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated)
|
|
|
|
}
|
|
|
|
|
|
|
|
// RemovePermanentAddress implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
return e.mu.addressableEndpointState.RemovePermanentAddress(addr)
|
|
|
|
}
|
|
|
|
|
2020-09-30 02:44:42 +00:00
|
|
|
// MainAddress implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) MainAddress() tcpip.AddressWithPrefix {
|
|
|
|
e.mu.RLock()
|
|
|
|
defer e.mu.RUnlock()
|
|
|
|
return e.mu.addressableEndpointState.MainAddress()
|
|
|
|
}
|
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
// AcquireAssignedAddress implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
|
|
|
|
loopback := e.nic.IsLoopback()
|
|
|
|
addressEndpoint := e.mu.addressableEndpointState.ReadOnly().AddrOrMatching(localAddr, allowTemp, func(addressEndpoint stack.AddressEndpoint) bool {
|
|
|
|
subnet := addressEndpoint.AddressWithPrefix().Subnet()
|
|
|
|
// IPv4 has a notion of a subnet broadcast address and considers the
|
|
|
|
// loopback interface bound to an address's whole subnet (on linux).
|
|
|
|
return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr))
|
|
|
|
})
|
|
|
|
if addressEndpoint != nil {
|
|
|
|
return addressEndpoint
|
|
|
|
}
|
|
|
|
|
|
|
|
if !allowTemp {
|
|
|
|
return nil
|
|
|
|
}
|
2019-10-08 02:28:26 +00:00
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
addr := localAddr.WithPrefix()
|
|
|
|
addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquireTemporaryAddress(addr, tempPEB)
|
|
|
|
if err != nil {
|
|
|
|
// AddAddress only returns an error if the address is already assigned,
|
|
|
|
// but we just checked above if the address exists so we expect no error.
|
|
|
|
panic(fmt.Sprintf("e.mu.addressableEndpointState.AddAndAcquireTemporaryAddress(%s, %d): %s", addr, tempPEB, err))
|
|
|
|
}
|
|
|
|
return addressEndpoint
|
|
|
|
}
|
|
|
|
|
2020-09-30 02:44:42 +00:00
|
|
|
// AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
|
2020-09-29 07:18:37 +00:00
|
|
|
e.mu.RLock()
|
|
|
|
defer e.mu.RUnlock()
|
2020-09-30 02:44:42 +00:00
|
|
|
return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired)
|
2020-09-29 07:18:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// PrimaryAddresses implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix {
|
|
|
|
e.mu.RLock()
|
|
|
|
defer e.mu.RUnlock()
|
|
|
|
return e.mu.addressableEndpointState.PrimaryAddresses()
|
|
|
|
}
|
|
|
|
|
|
|
|
// PermanentAddresses implements stack.AddressableEndpoint.
|
|
|
|
func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
|
|
|
|
e.mu.RLock()
|
|
|
|
defer e.mu.RUnlock()
|
|
|
|
return e.mu.addressableEndpointState.PermanentAddresses()
|
|
|
|
}
|
|
|
|
|
|
|
|
// JoinGroup implements stack.GroupAddressableEndpoint.
|
|
|
|
func (e *endpoint) JoinGroup(addr tcpip.Address) (bool, *tcpip.Error) {
|
|
|
|
if !header.IsV4MulticastAddress(addr) {
|
|
|
|
return false, tcpip.ErrBadAddress
|
|
|
|
}
|
|
|
|
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
return e.mu.addressableEndpointState.JoinGroup(addr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// LeaveGroup implements stack.GroupAddressableEndpoint.
|
|
|
|
func (e *endpoint) LeaveGroup(addr tcpip.Address) (bool, *tcpip.Error) {
|
|
|
|
e.mu.Lock()
|
|
|
|
defer e.mu.Unlock()
|
|
|
|
return e.mu.addressableEndpointState.LeaveGroup(addr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsInGroup implements stack.GroupAddressableEndpoint.
|
|
|
|
func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
|
|
|
|
e.mu.RLock()
|
|
|
|
defer e.mu.RUnlock()
|
|
|
|
return e.mu.addressableEndpointState.IsInGroup(addr)
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
|
|
|
|
var _ stack.NetworkProtocol = (*protocol)(nil)
|
|
|
|
|
|
|
|
type protocol struct {
|
2020-09-29 09:04:11 +00:00
|
|
|
stack *stack.Stack
|
|
|
|
|
2019-10-08 02:28:26 +00:00
|
|
|
// defaultTTL is the current default TTL for the protocol. Only the
|
2020-09-29 07:18:37 +00:00
|
|
|
// uint8 portion of it is meaningful.
|
|
|
|
//
|
|
|
|
// Must be accessed using atomic operations.
|
2019-10-08 02:28:26 +00:00
|
|
|
defaultTTL uint32
|
2020-08-04 23:25:15 +00:00
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
// forwarding is set to 1 when the protocol has forwarding enabled and 0
|
|
|
|
// when it is disabled.
|
|
|
|
//
|
|
|
|
// Must be accessed using atomic operations.
|
|
|
|
forwarding uint32
|
|
|
|
|
|
|
|
ids []uint32
|
|
|
|
hashIV uint32
|
|
|
|
|
2020-08-04 23:25:15 +00:00
|
|
|
fragmentation *fragmentation.Fragmentation
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Number returns the ipv4 protocol number.
|
|
|
|
func (p *protocol) Number() tcpip.NetworkProtocolNumber {
|
|
|
|
return ProtocolNumber
|
|
|
|
}
|
|
|
|
|
|
|
|
// MinimumPacketSize returns the minimum valid ipv4 packet size.
|
|
|
|
func (p *protocol) MinimumPacketSize() int {
|
|
|
|
return header.IPv4MinimumSize
|
|
|
|
}
|
|
|
|
|
2019-07-24 20:40:52 +00:00
|
|
|
// DefaultPrefixLen returns the IPv4 default prefix length.
|
|
|
|
func (p *protocol) DefaultPrefixLen() int {
|
|
|
|
return header.IPv4AddressSize * 8
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// ParseAddresses implements NetworkProtocol.ParseAddresses.
|
|
|
|
func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
|
|
|
|
h := header.IPv4(v)
|
|
|
|
return h.SourceAddress(), h.DestinationAddress()
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetOption implements NetworkProtocol.SetOption.
|
2020-08-28 18:47:58 +00:00
|
|
|
func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
|
2019-10-08 02:28:26 +00:00
|
|
|
switch v := option.(type) {
|
2020-08-28 18:47:58 +00:00
|
|
|
case *tcpip.DefaultTTLOption:
|
|
|
|
p.SetDefaultTTL(uint8(*v))
|
2019-10-08 02:28:26 +00:00
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return tcpip.ErrUnknownProtocolOption
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Option implements NetworkProtocol.Option.
|
2020-08-28 18:47:58 +00:00
|
|
|
func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error {
|
2019-10-08 02:28:26 +00:00
|
|
|
switch v := option.(type) {
|
|
|
|
case *tcpip.DefaultTTLOption:
|
|
|
|
*v = tcpip.DefaultTTLOption(p.DefaultTTL())
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return tcpip.ErrUnknownProtocolOption
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetDefaultTTL sets the default TTL for endpoints created with this protocol.
|
|
|
|
func (p *protocol) SetDefaultTTL(ttl uint8) {
|
|
|
|
atomic.StoreUint32(&p.defaultTTL, uint32(ttl))
|
|
|
|
}
|
|
|
|
|
|
|
|
// DefaultTTL returns the default TTL for endpoints created with this protocol.
|
|
|
|
func (p *protocol) DefaultTTL() uint8 {
|
|
|
|
return uint8(atomic.LoadUint32(&p.defaultTTL))
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2020-02-24 18:31:01 +00:00
|
|
|
// Close implements stack.TransportProtocol.Close.
|
|
|
|
func (*protocol) Close() {}
|
|
|
|
|
|
|
|
// Wait implements stack.TransportProtocol.Wait.
|
|
|
|
func (*protocol) Wait() {}
|
|
|
|
|
2020-09-18 07:46:26 +00:00
|
|
|
// Parse implements stack.NetworkProtocol.Parse.
|
2020-06-07 20:37:25 +00:00
|
|
|
func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) {
|
2020-09-18 07:46:26 +00:00
|
|
|
if ok := parse.IPv4(pkt); !ok {
|
2020-08-13 20:07:03 +00:00
|
|
|
return 0, false, false
|
2020-06-07 20:37:25 +00:00
|
|
|
}
|
|
|
|
|
2020-09-18 07:46:26 +00:00
|
|
|
ipHdr := header.IPv4(pkt.NetworkHeader().View())
|
|
|
|
return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true
|
2020-06-07 20:37:25 +00:00
|
|
|
}
|
|
|
|
|
2020-09-29 07:18:37 +00:00
|
|
|
// Forwarding implements stack.ForwardingNetworkProtocol.
|
|
|
|
func (p *protocol) Forwarding() bool {
|
|
|
|
return uint8(atomic.LoadUint32(&p.forwarding)) == 1
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetForwarding implements stack.ForwardingNetworkProtocol.
|
|
|
|
func (p *protocol) SetForwarding(v bool) {
|
|
|
|
if v {
|
|
|
|
atomic.StoreUint32(&p.forwarding, 1)
|
|
|
|
} else {
|
|
|
|
atomic.StoreUint32(&p.forwarding, 0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-22 05:10:13 +00:00
|
|
|
// calculateNetworkMTU calculates the network-layer payload MTU based on the
|
|
|
|
// link-layer payload mtu.
|
|
|
|
func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, *tcpip.Error) {
|
|
|
|
if linkMTU < header.IPv4MinimumMTU {
|
|
|
|
return 0, tcpip.ErrInvalidEndpointState
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2020-10-22 05:10:13 +00:00
|
|
|
// As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in
|
|
|
|
// length:
|
|
|
|
// The maximal internet header is 60 octets, and a typical internet header
|
|
|
|
// is 20 octets, allowing a margin for headers of higher level protocols.
|
|
|
|
if networkHeaderSize > header.IPv4MaximumHeaderSize {
|
|
|
|
return 0, tcpip.ErrMalformedHeader
|
2020-10-06 20:55:02 +00:00
|
|
|
}
|
2020-10-22 05:10:13 +00:00
|
|
|
|
|
|
|
networkMTU := linkMTU
|
|
|
|
if networkMTU > MaxTotalSize {
|
|
|
|
networkMTU = MaxTotalSize
|
|
|
|
}
|
|
|
|
|
|
|
|
return networkMTU - uint32(networkHeaderSize), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *stack.GSO) bool {
|
|
|
|
payload := pkt.TransportHeader().View().Size() + pkt.Data.Size()
|
|
|
|
return (gso == nil || gso.Type == stack.GSONone) && uint32(payload) > networkMTU
|
2020-10-06 20:55:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// addressToUint32 translates an IPv4 address into its little endian uint32
|
|
|
|
// representation.
|
|
|
|
//
|
|
|
|
// This function does the same thing as binary.LittleEndian.Uint32 but operates
|
|
|
|
// on a tcpip.Address (a string) without the need to convert it to a byte slice,
|
|
|
|
// which would cause an allocation.
|
|
|
|
func addressToUint32(addr tcpip.Address) uint32 {
|
|
|
|
_ = addr[3] // bounds check hint to compiler
|
|
|
|
return uint32(addr[0]) | uint32(addr[1])<<8 | uint32(addr[2])<<16 | uint32(addr[3])<<24
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// hashRoute calculates a hash value for the given route. It uses the source &
|
2020-10-06 20:55:02 +00:00
|
|
|
// destination address, the transport protocol number and a 32-bit number to
|
|
|
|
// generate the hash.
|
2019-09-25 19:56:00 +00:00
|
|
|
func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 {
|
2020-10-06 20:55:02 +00:00
|
|
|
a := addressToUint32(r.LocalAddress)
|
|
|
|
b := addressToUint32(r.RemoteAddress)
|
2018-04-27 17:37:02 +00:00
|
|
|
return hash.Hash3Words(a, b, uint32(protocol), hashIV)
|
|
|
|
}
|
|
|
|
|
2019-09-25 19:56:00 +00:00
|
|
|
// NewProtocol returns an IPv4 network protocol.
|
2020-09-29 09:04:11 +00:00
|
|
|
func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
|
2019-09-25 19:56:00 +00:00
|
|
|
ids := make([]uint32, buckets)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Randomly initialize hashIV and the ids.
|
|
|
|
r := hash.RandN32(1 + buckets)
|
|
|
|
for i := range ids {
|
|
|
|
ids[i] = r[i]
|
|
|
|
}
|
2019-09-25 19:56:00 +00:00
|
|
|
hashIV := r[buckets]
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-08-04 23:25:15 +00:00
|
|
|
return &protocol{
|
2020-09-29 09:04:11 +00:00
|
|
|
stack: s,
|
2020-08-04 23:25:15 +00:00
|
|
|
ids: ids,
|
|
|
|
hashIV: hashIV,
|
|
|
|
defaultTTL: DefaultTTL,
|
2020-10-08 07:54:05 +00:00
|
|
|
fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, reassembleTimeout, s.Clock()),
|
2020-08-04 23:25:15 +00:00
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
2020-10-06 20:55:02 +00:00
|
|
|
|
|
|
|
func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) {
|
|
|
|
fragPkt, offset, copied, more := pf.BuildNextFragment()
|
|
|
|
fragPkt.NetworkProtocolNumber = ProtocolNumber
|
|
|
|
|
|
|
|
originalIPHeaderLength := len(originalIPHeader)
|
|
|
|
nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength))
|
|
|
|
|
|
|
|
if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) {
|
|
|
|
panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength))
|
|
|
|
}
|
|
|
|
|
|
|
|
flags := originalIPHeader.Flags()
|
|
|
|
if more {
|
|
|
|
flags |= header.IPv4FlagMoreFragments
|
|
|
|
}
|
|
|
|
nextFragIPHeader.SetFlagsFragmentOffset(flags, uint16(offset))
|
|
|
|
nextFragIPHeader.SetTotalLength(uint16(nextFragIPHeader.HeaderLength()) + uint16(copied))
|
|
|
|
nextFragIPHeader.SetChecksum(0)
|
|
|
|
nextFragIPHeader.SetChecksum(^nextFragIPHeader.CalculateChecksum())
|
|
|
|
|
|
|
|
return fragPkt, more
|
|
|
|
}
|