2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-07-09 21:03:03 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Package ipv4 contains the implementation of the ipv4 network protocol. To use
|
|
|
|
// it in the networking stack, this package must be added to the project, and
|
2019-09-25 19:56:00 +00:00
|
|
|
// activated on the stack by passing ipv4.NewProtocol() as one of the network
|
|
|
|
// protocols when calling stack.New(). Then endpoints can be created by passing
|
|
|
|
// ipv4.ProtocolNumber as the network protocol number when calling
|
2018-04-27 17:37:02 +00:00
|
|
|
// Stack.NewEndpoint().
|
|
|
|
package ipv4
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync/atomic"
|
|
|
|
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
2020-09-18 07:46:26 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/header/parse"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/fragmentation"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/hash"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// ProtocolNumber is the ipv4 protocol number.
|
|
|
|
ProtocolNumber = header.IPv4ProtocolNumber
|
|
|
|
|
2019-02-26 22:57:27 +00:00
|
|
|
// MaxTotalSize is maximum size that can be encoded in the 16-bit
|
2018-04-27 17:37:02 +00:00
|
|
|
// TotalLength field of the ipv4 header.
|
2019-02-26 22:57:27 +00:00
|
|
|
MaxTotalSize = 0xffff
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2019-10-08 02:28:26 +00:00
|
|
|
// DefaultTTL is the default time-to-live value for this endpoint.
|
|
|
|
DefaultTTL = 64
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// buckets is the number of identifier buckets.
|
|
|
|
buckets = 2048
|
2020-07-30 21:19:38 +00:00
|
|
|
|
|
|
|
// The size of a fragment block, in bytes, as per RFC 791 section 3.1,
|
|
|
|
// page 14.
|
|
|
|
fragmentblockSize = 8
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type endpoint struct {
|
2020-08-04 23:25:15 +00:00
|
|
|
nicID tcpip.NICID
|
|
|
|
linkEP stack.LinkEndpoint
|
|
|
|
dispatcher stack.TransportDispatcher
|
|
|
|
protocol *protocol
|
|
|
|
stack *stack.Stack
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2018-10-31 15:04:05 +00:00
|
|
|
// NewEndpoint creates a new ipv4 endpoint.
|
2020-08-25 18:07:32 +00:00
|
|
|
func (p *protocol) NewEndpoint(nicID tcpip.NICID, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) stack.NetworkEndpoint {
|
2020-08-15 00:27:23 +00:00
|
|
|
return &endpoint{
|
2020-08-04 23:25:15 +00:00
|
|
|
nicID: nicID,
|
|
|
|
linkEP: linkEP,
|
|
|
|
dispatcher: dispatcher,
|
|
|
|
protocol: p,
|
|
|
|
stack: st,
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-13 03:38:27 +00:00
|
|
|
// DefaultTTL is the default time-to-live value for this endpoint.
|
|
|
|
func (e *endpoint) DefaultTTL() uint8 {
|
2019-10-08 02:28:26 +00:00
|
|
|
return e.protocol.DefaultTTL()
|
2018-09-13 03:38:27 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
|
|
|
|
// the network layer max header length.
|
|
|
|
func (e *endpoint) MTU() uint32 {
|
|
|
|
return calculateMTU(e.linkEP.MTU())
|
|
|
|
}
|
|
|
|
|
|
|
|
// Capabilities implements stack.NetworkEndpoint.Capabilities.
|
|
|
|
func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
|
|
|
|
return e.linkEP.Capabilities()
|
|
|
|
}
|
|
|
|
|
|
|
|
// NICID returns the ID of the NIC this endpoint belongs to.
|
|
|
|
func (e *endpoint) NICID() tcpip.NICID {
|
2019-11-07 03:39:57 +00:00
|
|
|
return e.nicID
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// MaxHeaderLength returns the maximum length needed by ipv4 headers (and
|
|
|
|
// underlying protocols).
|
|
|
|
func (e *endpoint) MaxHeaderLength() uint16 {
|
|
|
|
return e.linkEP.MaxHeaderLength() + header.IPv4MinimumSize
|
|
|
|
}
|
|
|
|
|
2019-03-28 18:02:23 +00:00
|
|
|
// GSOMaxSize returns the maximum GSO packet size.
|
|
|
|
func (e *endpoint) GSOMaxSize() uint32 {
|
|
|
|
if gso, ok := e.linkEP.(stack.GSOEndpoint); ok {
|
|
|
|
return gso.GSOMaxSize()
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2020-04-30 23:39:18 +00:00
|
|
|
// NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber.
|
|
|
|
func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
|
|
|
|
return e.protocol.Number()
|
|
|
|
}
|
|
|
|
|
2019-05-03 20:29:20 +00:00
|
|
|
// writePacketFragments calls e.linkEP.WritePacket with each packet fragment to
|
2020-08-13 20:07:03 +00:00
|
|
|
// write. It assumes that the IP header is already present in pkt.NetworkHeader.
|
|
|
|
// pkt.TransportHeader may be set. mtu includes the IP header and options. This
|
|
|
|
// does not support the DontFragment IP flag.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int, pkt *stack.PacketBuffer) *tcpip.Error {
|
2019-05-03 20:29:20 +00:00
|
|
|
// This packet is too big, it needs to be fragmented.
|
2020-08-13 20:07:03 +00:00
|
|
|
ip := header.IPv4(pkt.NetworkHeader().View())
|
2019-05-03 20:29:20 +00:00
|
|
|
flags := ip.Flags()
|
|
|
|
|
|
|
|
// Update mtu to take into account the header, which will exist in all
|
|
|
|
// fragments anyway.
|
|
|
|
innerMTU := mtu - int(ip.HeaderLength())
|
|
|
|
|
|
|
|
// Round the MTU down to align to 8 bytes. Then calculate the number of
|
|
|
|
// fragments. Calculate fragment sizes as in RFC791.
|
|
|
|
innerMTU &^= 7
|
|
|
|
n := (int(ip.PayloadLength()) + innerMTU - 1) / innerMTU
|
|
|
|
|
|
|
|
outerMTU := innerMTU + int(ip.HeaderLength())
|
|
|
|
offset := ip.FragmentOffset()
|
2020-08-13 20:07:03 +00:00
|
|
|
|
|
|
|
// Keep the length reserved for link-layer, we need to create fragments with
|
|
|
|
// the same reserved length.
|
|
|
|
reservedForLink := pkt.AvailableHeaderBytes()
|
|
|
|
|
|
|
|
// Destroy the packet, pull all payloads out for fragmentation.
|
|
|
|
transHeader, data := pkt.TransportHeader().View(), pkt.Data
|
|
|
|
|
|
|
|
// Where possible, the first fragment that is sent has the same
|
|
|
|
// number of bytes reserved for header as the input packet. The link-layer
|
|
|
|
// endpoint may depend on this for looking at, eg, L4 headers.
|
|
|
|
transFitsFirst := len(transHeader) <= innerMTU
|
|
|
|
|
2019-05-03 20:29:20 +00:00
|
|
|
for i := 0; i < n; i++ {
|
2020-08-13 20:07:03 +00:00
|
|
|
reserve := reservedForLink + int(ip.HeaderLength())
|
|
|
|
if i == 0 && transFitsFirst {
|
|
|
|
// Reserve for transport header if it's going to be put in the first
|
|
|
|
// fragment.
|
|
|
|
reserve += len(transHeader)
|
|
|
|
}
|
|
|
|
fragPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
|
|
|
|
ReserveHeaderBytes: reserve,
|
|
|
|
})
|
|
|
|
fragPkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
|
|
|
|
|
|
|
|
// Copy data for the fragment.
|
|
|
|
avail := innerMTU
|
|
|
|
|
|
|
|
if n := len(transHeader); n > 0 {
|
|
|
|
if n > avail {
|
|
|
|
n = avail
|
|
|
|
}
|
|
|
|
if i == 0 && transFitsFirst {
|
|
|
|
copy(fragPkt.TransportHeader().Push(n), transHeader)
|
|
|
|
} else {
|
|
|
|
fragPkt.Data.AppendView(transHeader[:n:n])
|
|
|
|
}
|
|
|
|
transHeader = transHeader[n:]
|
|
|
|
avail -= n
|
2019-05-03 20:29:20 +00:00
|
|
|
}
|
2020-08-13 20:07:03 +00:00
|
|
|
|
|
|
|
if avail > 0 {
|
|
|
|
n := data.Size()
|
|
|
|
if n > avail {
|
|
|
|
n = avail
|
|
|
|
}
|
|
|
|
data.ReadToVV(&fragPkt.Data, n)
|
|
|
|
avail -= n
|
|
|
|
}
|
|
|
|
|
|
|
|
copied := uint16(innerMTU - avail)
|
|
|
|
|
|
|
|
// Set lengths in header and calculate checksum.
|
|
|
|
h := header.IPv4(fragPkt.NetworkHeader().Push(len(ip)))
|
|
|
|
copy(h, ip)
|
2019-05-03 20:29:20 +00:00
|
|
|
if i != n-1 {
|
|
|
|
h.SetTotalLength(uint16(outerMTU))
|
|
|
|
h.SetFlagsFragmentOffset(flags|header.IPv4FlagMoreFragments, offset)
|
|
|
|
} else {
|
2020-08-13 20:07:03 +00:00
|
|
|
h.SetTotalLength(uint16(h.HeaderLength()) + copied)
|
2019-05-03 20:29:20 +00:00
|
|
|
h.SetFlagsFragmentOffset(flags, offset)
|
|
|
|
}
|
|
|
|
h.SetChecksum(0)
|
|
|
|
h.SetChecksum(^h.CalculateChecksum())
|
2020-08-13 20:07:03 +00:00
|
|
|
offset += copied
|
|
|
|
|
|
|
|
// Send out the fragment.
|
|
|
|
if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, fragPkt); err != nil {
|
|
|
|
return err
|
2019-05-03 20:29:20 +00:00
|
|
|
}
|
2020-08-13 20:07:03 +00:00
|
|
|
r.Stats().IP.PacketsSent.Increment()
|
2019-05-03 20:29:20 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-08-13 20:07:03 +00:00
|
|
|
func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams) {
|
|
|
|
ip := header.IPv4(pkt.NetworkHeader().Push(header.IPv4MinimumSize))
|
|
|
|
length := uint16(pkt.Size())
|
2020-07-07 23:13:21 +00:00
|
|
|
// RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic
|
|
|
|
// datagrams. Since the DF bit is never being set here, all datagrams
|
|
|
|
// are non-atomic and need an ID.
|
|
|
|
id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1)
|
2018-04-27 17:37:02 +00:00
|
|
|
ip.Encode(&header.IPv4Fields{
|
|
|
|
IHL: header.IPv4MinimumSize,
|
|
|
|
TotalLength: length,
|
|
|
|
ID: uint16(id),
|
2019-10-15 00:45:29 +00:00
|
|
|
TTL: params.TTL,
|
|
|
|
TOS: params.TOS,
|
|
|
|
Protocol: uint8(params.Protocol),
|
2018-10-31 15:04:05 +00:00
|
|
|
SrcAddr: r.LocalAddress,
|
2018-04-27 17:37:02 +00:00
|
|
|
DstAddr: r.RemoteAddress,
|
|
|
|
})
|
|
|
|
ip.SetChecksum(^ip.CalculateChecksum())
|
2020-08-13 20:07:03 +00:00
|
|
|
pkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// WritePacket writes a packet to the given destination address and protocol.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
|
2020-08-13 20:07:03 +00:00
|
|
|
e.addIPHeader(r, pkt, params)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-03-20 19:00:21 +00:00
|
|
|
// iptables filtering. All packets that reach here are locally
|
|
|
|
// generated.
|
2020-08-11 02:32:48 +00:00
|
|
|
nicName := e.stack.FindNICNameFromID(e.NICID())
|
2020-03-20 19:00:21 +00:00
|
|
|
ipt := e.stack.IPTables()
|
2020-06-03 21:57:57 +00:00
|
|
|
if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
|
2020-03-20 19:00:21 +00:00
|
|
|
// iptables is telling us to drop the packet.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-09-18 04:52:54 +00:00
|
|
|
// If the packet is manipulated as per NAT Output rules, handle packet
|
|
|
|
// based on destination address and do not send the packet to link
|
|
|
|
// layer.
|
|
|
|
//
|
|
|
|
// TODO(gvisor.dev/issue/170): We should do this for every
|
|
|
|
// packet, rather than only NATted packets, but removing this check
|
|
|
|
// short circuits broadcasts before they are sent out to other hosts.
|
2020-03-27 19:18:45 +00:00
|
|
|
if pkt.NatDone {
|
2020-08-13 20:07:03 +00:00
|
|
|
netHeader := header.IPv4(pkt.NetworkHeader().View())
|
2020-03-27 19:18:45 +00:00
|
|
|
ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress())
|
|
|
|
if err == nil {
|
2020-06-04 02:57:39 +00:00
|
|
|
route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
|
2020-06-07 20:37:25 +00:00
|
|
|
ep.HandlePacket(&route, pkt)
|
2020-03-27 19:18:45 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketLoop != 0 {
|
2019-06-05 23:07:18 +00:00
|
|
|
loopedR := r.MakeLoopedRoute()
|
2020-06-07 20:37:25 +00:00
|
|
|
e.HandlePacket(&loopedR, pkt)
|
2019-06-05 23:07:18 +00:00
|
|
|
loopedR.Release()
|
2019-03-08 23:48:16 +00:00
|
|
|
}
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketOut == 0 {
|
2019-03-08 23:48:16 +00:00
|
|
|
return nil
|
|
|
|
}
|
2020-08-13 20:07:03 +00:00
|
|
|
if pkt.Size() > int(e.linkEP.MTU()) && (gso == nil || gso.Type == stack.GSONone) {
|
2019-11-14 18:14:07 +00:00
|
|
|
return e.writePacketFragments(r, gso, int(e.linkEP.MTU()), pkt)
|
2019-05-03 20:29:20 +00:00
|
|
|
}
|
2019-11-14 18:14:07 +00:00
|
|
|
if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
|
2019-05-03 20:29:20 +00:00
|
|
|
return err
|
|
|
|
}
|
2019-03-08 23:48:16 +00:00
|
|
|
r.Stats().IP.PacketsSent.Increment()
|
2019-05-03 20:29:20 +00:00
|
|
|
return nil
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2019-10-22 18:54:14 +00:00
|
|
|
// WritePackets implements stack.NetworkEndpoint.WritePackets.
|
2020-04-04 01:34:48 +00:00
|
|
|
func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, *tcpip.Error) {
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketLoop != 0 {
|
2019-10-22 18:54:14 +00:00
|
|
|
panic("multiple packets in local loop")
|
|
|
|
}
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketOut == 0 {
|
2020-04-04 01:34:48 +00:00
|
|
|
return pkts.Len(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for pkt := pkts.Front(); pkt != nil; {
|
2020-08-13 20:07:03 +00:00
|
|
|
e.addIPHeader(r, pkt, params)
|
2020-04-04 01:34:48 +00:00
|
|
|
pkt = pkt.Next()
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
|
2020-05-08 22:39:04 +00:00
|
|
|
nicName := e.stack.FindNICNameFromID(e.NICID())
|
2020-03-20 19:00:21 +00:00
|
|
|
// iptables filtering. All packets that reach here are locally
|
|
|
|
// generated.
|
|
|
|
ipt := e.stack.IPTables()
|
2020-05-08 22:39:04 +00:00
|
|
|
dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
|
2020-03-27 19:18:45 +00:00
|
|
|
if len(dropped) == 0 && len(natPkts) == 0 {
|
2020-04-04 01:34:48 +00:00
|
|
|
// Fast path: If no packets are to be dropped then we can just invoke the
|
|
|
|
// faster WritePackets API directly.
|
|
|
|
n, err := e.linkEP.WritePackets(r, gso, pkts, ProtocolNumber)
|
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
2020-09-18 04:52:54 +00:00
|
|
|
// Slow path as we are dropping some packets in the batch degrade to
|
2020-04-04 01:34:48 +00:00
|
|
|
// emitting one packet at a time.
|
|
|
|
n := 0
|
|
|
|
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
|
|
|
|
if _, ok := dropped[pkt]; ok {
|
2020-03-20 19:00:21 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-03-27 19:18:45 +00:00
|
|
|
if _, ok := natPkts[pkt]; ok {
|
2020-08-13 20:07:03 +00:00
|
|
|
netHeader := header.IPv4(pkt.NetworkHeader().View())
|
2020-06-07 20:37:25 +00:00
|
|
|
if ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()); err == nil {
|
2020-03-27 19:18:45 +00:00
|
|
|
src := netHeader.SourceAddress()
|
|
|
|
dst := netHeader.DestinationAddress()
|
|
|
|
route := r.ReverseRoute(src, dst)
|
2020-06-07 20:37:25 +00:00
|
|
|
ep.HandlePacket(&route, pkt)
|
2020-03-27 19:18:45 +00:00
|
|
|
n++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
2020-06-03 21:57:57 +00:00
|
|
|
if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
|
2020-04-04 01:34:48 +00:00
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
n++
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
r.Stats().IP.PacketsSent.IncrementBy(uint64(n))
|
2020-04-04 01:34:48 +00:00
|
|
|
return n, nil
|
2019-10-22 18:54:14 +00:00
|
|
|
}
|
|
|
|
|
2019-07-13 01:08:03 +00:00
|
|
|
// WriteHeaderIncludedPacket writes a packet already containing a network
|
|
|
|
// header through the given route.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
|
2019-07-13 01:08:03 +00:00
|
|
|
// The packet already has an IP header, but there are a few required
|
|
|
|
// checks.
|
2020-05-01 23:08:26 +00:00
|
|
|
h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
|
|
|
|
if !ok {
|
|
|
|
return tcpip.ErrInvalidOptionValue
|
|
|
|
}
|
|
|
|
ip := header.IPv4(h)
|
2019-11-14 18:14:07 +00:00
|
|
|
if !ip.IsValid(pkt.Data.Size()) {
|
2019-07-13 01:08:03 +00:00
|
|
|
return tcpip.ErrInvalidOptionValue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always set the total length.
|
2019-11-14 18:14:07 +00:00
|
|
|
ip.SetTotalLength(uint16(pkt.Data.Size()))
|
2019-07-13 01:08:03 +00:00
|
|
|
|
|
|
|
// Set the source address when zero.
|
|
|
|
if ip.SourceAddress() == tcpip.Address(([]byte{0, 0, 0, 0})) {
|
|
|
|
ip.SetSourceAddress(r.LocalAddress)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the destination. If the packet already included a destination,
|
|
|
|
// it will be part of the route.
|
|
|
|
ip.SetDestinationAddress(r.RemoteAddress)
|
|
|
|
|
|
|
|
// Set the packet ID when zero.
|
|
|
|
if ip.ID() == 0 {
|
2020-07-07 23:13:21 +00:00
|
|
|
// RFC 6864 section 4.3 mandates uniqueness of ID values for
|
|
|
|
// non-atomic datagrams, so assign an ID to all such datagrams
|
|
|
|
// according to the definition given in RFC 6864 section 4.
|
|
|
|
if ip.Flags()&header.IPv4FlagDontFragment == 0 || ip.Flags()&header.IPv4FlagMoreFragments != 0 || ip.FragmentOffset() > 0 {
|
|
|
|
ip.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)))
|
2019-07-13 01:08:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always set the checksum.
|
|
|
|
ip.SetChecksum(0)
|
|
|
|
ip.SetChecksum(^ip.CalculateChecksum())
|
|
|
|
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketLoop != 0 {
|
2019-11-14 18:14:07 +00:00
|
|
|
e.HandlePacket(r, pkt.Clone())
|
2019-07-13 01:08:03 +00:00
|
|
|
}
|
2020-01-08 18:10:57 +00:00
|
|
|
if r.Loop&stack.PacketOut == 0 {
|
2019-07-13 01:08:03 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
r.Stats().IP.PacketsSent.Increment()
|
2019-11-14 18:14:07 +00:00
|
|
|
|
|
|
|
return e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
|
2019-07-13 01:08:03 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// HandlePacket is called by the link layer when new ipv4 packets arrive for
|
|
|
|
// this endpoint.
|
2020-06-03 21:57:57 +00:00
|
|
|
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
|
2020-08-13 20:07:03 +00:00
|
|
|
h := header.IPv4(pkt.NetworkHeader().View())
|
|
|
|
if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
|
2020-05-01 23:08:26 +00:00
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2020-01-21 22:47:17 +00:00
|
|
|
|
2020-01-11 02:07:15 +00:00
|
|
|
// iptables filtering. All packets that reach here are intended for
|
|
|
|
// this machine and will not be forwarded.
|
2020-01-08 23:57:25 +00:00
|
|
|
ipt := e.stack.IPTables()
|
2020-06-03 21:57:57 +00:00
|
|
|
if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
|
2020-01-08 22:48:47 +00:00
|
|
|
// iptables is telling us to drop the packet.
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-06-07 20:37:25 +00:00
|
|
|
if h.More() || h.FragmentOffset() != 0 {
|
2020-08-13 20:07:03 +00:00
|
|
|
if pkt.Data.Size()+pkt.TransportHeader().View().Size() == 0 {
|
2019-10-10 22:13:39 +00:00
|
|
|
// Drop the packet as it's marked as a fragment but has
|
|
|
|
// no payload.
|
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
r.Stats().IP.MalformedFragmentsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
// The packet is a fragment, let's try to reassemble it.
|
2020-09-13 06:19:34 +00:00
|
|
|
start := h.FragmentOffset()
|
|
|
|
// Drop the fragment if the size of the reassembled payload would exceed the
|
|
|
|
// maximum payload size.
|
|
|
|
//
|
|
|
|
// Note that this addition doesn't overflow even on 32bit architecture
|
|
|
|
// because pkt.Data.Size() should not exceed 65535 (the max IP datagram
|
|
|
|
// size). Otherwise the packet would've been rejected as invalid before
|
|
|
|
// reaching here.
|
|
|
|
if int(start)+pkt.Data.Size() > header.IPv4MaximumPayloadSize {
|
2019-10-10 22:13:39 +00:00
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
r.Stats().IP.MalformedFragmentsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2018-09-13 04:57:04 +00:00
|
|
|
var ready bool
|
2019-10-16 00:03:13 +00:00
|
|
|
var err error
|
2020-08-20 19:04:36 +00:00
|
|
|
proto := h.Protocol()
|
|
|
|
pkt.Data, _, ready, err = e.protocol.fragmentation.Process(
|
2020-08-04 23:25:15 +00:00
|
|
|
// As per RFC 791 section 2.3, the identification value is unique
|
|
|
|
// for a source-destination pair and protocol.
|
2020-07-31 21:18:18 +00:00
|
|
|
fragmentation.FragmentID{
|
|
|
|
Source: h.SourceAddress(),
|
|
|
|
Destination: h.DestinationAddress(),
|
|
|
|
ID: uint32(h.ID()),
|
2020-08-20 19:04:36 +00:00
|
|
|
Protocol: proto,
|
2020-07-31 21:18:18 +00:00
|
|
|
},
|
2020-09-13 06:19:34 +00:00
|
|
|
start,
|
|
|
|
start+uint16(pkt.Data.Size())-1,
|
2020-07-31 21:18:18 +00:00
|
|
|
h.More(),
|
2020-08-20 19:04:36 +00:00
|
|
|
proto,
|
2020-07-31 21:18:18 +00:00
|
|
|
pkt.Data,
|
|
|
|
)
|
2019-10-16 00:03:13 +00:00
|
|
|
if err != nil {
|
|
|
|
r.Stats().IP.MalformedPacketsReceived.Increment()
|
|
|
|
r.Stats().IP.MalformedFragmentsReceived.Increment()
|
|
|
|
return
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
if !ready {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p := h.TransportProtocol()
|
|
|
|
if p == header.ICMPv4ProtocolNumber {
|
2019-11-06 22:24:38 +00:00
|
|
|
e.handleICMP(r, pkt)
|
2018-04-27 17:37:02 +00:00
|
|
|
return
|
|
|
|
}
|
2018-08-27 22:28:38 +00:00
|
|
|
r.Stats().IP.PacketsDelivered.Increment()
|
2019-11-06 22:24:38 +00:00
|
|
|
e.dispatcher.DeliverTransportPacket(r, p, pkt)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close cleans up resources associated with the endpoint.
|
2019-03-26 18:43:55 +00:00
|
|
|
func (e *endpoint) Close() {}
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2019-09-25 19:56:00 +00:00
|
|
|
type protocol struct {
|
|
|
|
ids []uint32
|
|
|
|
hashIV uint32
|
2019-10-08 02:28:26 +00:00
|
|
|
|
|
|
|
// defaultTTL is the current default TTL for the protocol. Only the
|
|
|
|
// uint8 portion of it is meaningful and it must be accessed
|
|
|
|
// atomically.
|
|
|
|
defaultTTL uint32
|
2020-08-04 23:25:15 +00:00
|
|
|
|
|
|
|
fragmentation *fragmentation.Fragmentation
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Number returns the ipv4 protocol number.
|
|
|
|
func (p *protocol) Number() tcpip.NetworkProtocolNumber {
|
|
|
|
return ProtocolNumber
|
|
|
|
}
|
|
|
|
|
|
|
|
// MinimumPacketSize returns the minimum valid ipv4 packet size.
|
|
|
|
func (p *protocol) MinimumPacketSize() int {
|
|
|
|
return header.IPv4MinimumSize
|
|
|
|
}
|
|
|
|
|
2019-07-24 20:40:52 +00:00
|
|
|
// DefaultPrefixLen returns the IPv4 default prefix length.
|
|
|
|
func (p *protocol) DefaultPrefixLen() int {
|
|
|
|
return header.IPv4AddressSize * 8
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// ParseAddresses implements NetworkProtocol.ParseAddresses.
|
|
|
|
func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
|
|
|
|
h := header.IPv4(v)
|
|
|
|
return h.SourceAddress(), h.DestinationAddress()
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetOption implements NetworkProtocol.SetOption.
|
2020-08-28 18:47:58 +00:00
|
|
|
func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error {
|
2019-10-08 02:28:26 +00:00
|
|
|
switch v := option.(type) {
|
2020-08-28 18:47:58 +00:00
|
|
|
case *tcpip.DefaultTTLOption:
|
|
|
|
p.SetDefaultTTL(uint8(*v))
|
2019-10-08 02:28:26 +00:00
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return tcpip.ErrUnknownProtocolOption
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Option implements NetworkProtocol.Option.
|
2020-08-28 18:47:58 +00:00
|
|
|
func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error {
|
2019-10-08 02:28:26 +00:00
|
|
|
switch v := option.(type) {
|
|
|
|
case *tcpip.DefaultTTLOption:
|
|
|
|
*v = tcpip.DefaultTTLOption(p.DefaultTTL())
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return tcpip.ErrUnknownProtocolOption
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetDefaultTTL sets the default TTL for endpoints created with this protocol.
|
|
|
|
func (p *protocol) SetDefaultTTL(ttl uint8) {
|
|
|
|
atomic.StoreUint32(&p.defaultTTL, uint32(ttl))
|
|
|
|
}
|
|
|
|
|
|
|
|
// DefaultTTL returns the default TTL for endpoints created with this protocol.
|
|
|
|
func (p *protocol) DefaultTTL() uint8 {
|
|
|
|
return uint8(atomic.LoadUint32(&p.defaultTTL))
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2020-02-24 18:31:01 +00:00
|
|
|
// Close implements stack.TransportProtocol.Close.
|
|
|
|
func (*protocol) Close() {}
|
|
|
|
|
|
|
|
// Wait implements stack.TransportProtocol.Wait.
|
|
|
|
func (*protocol) Wait() {}
|
|
|
|
|
2020-09-18 07:46:26 +00:00
|
|
|
// Parse implements stack.NetworkProtocol.Parse.
|
2020-06-07 20:37:25 +00:00
|
|
|
func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) {
|
2020-09-18 07:46:26 +00:00
|
|
|
if ok := parse.IPv4(pkt); !ok {
|
2020-08-13 20:07:03 +00:00
|
|
|
return 0, false, false
|
2020-06-07 20:37:25 +00:00
|
|
|
}
|
|
|
|
|
2020-09-18 07:46:26 +00:00
|
|
|
ipHdr := header.IPv4(pkt.NetworkHeader().View())
|
|
|
|
return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true
|
2020-06-07 20:37:25 +00:00
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// calculateMTU calculates the network-layer payload MTU based on the link-layer
|
|
|
|
// payload mtu.
|
|
|
|
func calculateMTU(mtu uint32) uint32 {
|
2019-02-26 22:57:27 +00:00
|
|
|
if mtu > MaxTotalSize {
|
|
|
|
mtu = MaxTotalSize
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
return mtu - header.IPv4MinimumSize
|
|
|
|
}
|
|
|
|
|
|
|
|
// hashRoute calculates a hash value for the given route. It uses the source &
|
|
|
|
// destination address, the transport protocol number, and a random initial
|
|
|
|
// value (generated once on initialization) to generate the hash.
|
2019-09-25 19:56:00 +00:00
|
|
|
func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 {
|
2018-04-27 17:37:02 +00:00
|
|
|
t := r.LocalAddress
|
|
|
|
a := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24
|
|
|
|
t = r.RemoteAddress
|
|
|
|
b := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24
|
|
|
|
return hash.Hash3Words(a, b, uint32(protocol), hashIV)
|
|
|
|
}
|
|
|
|
|
2019-09-25 19:56:00 +00:00
|
|
|
// NewProtocol returns an IPv4 network protocol.
|
|
|
|
func NewProtocol() stack.NetworkProtocol {
|
|
|
|
ids := make([]uint32, buckets)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Randomly initialize hashIV and the ids.
|
|
|
|
r := hash.RandN32(1 + buckets)
|
|
|
|
for i := range ids {
|
|
|
|
ids[i] = r[i]
|
|
|
|
}
|
2019-09-25 19:56:00 +00:00
|
|
|
hashIV := r[buckets]
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-08-04 23:25:15 +00:00
|
|
|
return &protocol{
|
|
|
|
ids: ids,
|
|
|
|
hashIV: hashIV,
|
|
|
|
defaultTTL: DefaultTTL,
|
|
|
|
fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|