Merge release-20190806.1-295-g12235d5 (automated)
This commit is contained in:
commit
975132cced
|
@ -256,6 +256,17 @@ type SockAddrInet6 struct {
|
|||
Scope_id uint32
|
||||
}
|
||||
|
||||
// SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h.
|
||||
type SockAddrLink struct {
|
||||
Family uint16
|
||||
Protocol uint16
|
||||
InterfaceIndex int32
|
||||
ARPHardwareType uint16
|
||||
PacketType byte
|
||||
HardwareAddrLen byte
|
||||
HardwareAddr [8]byte
|
||||
}
|
||||
|
||||
// UnixPathMax is the maximum length of the path in an AF_UNIX socket.
|
||||
//
|
||||
// From uapi/linux/un.h.
|
||||
|
@ -278,6 +289,7 @@ type SockAddr interface {
|
|||
|
||||
func (s *SockAddrInet) implementsSockAddr() {}
|
||||
func (s *SockAddrInet6) implementsSockAddr() {}
|
||||
func (s *SockAddrLink) implementsSockAddr() {}
|
||||
func (s *SockAddrUnix) implementsSockAddr() {}
|
||||
func (s *SockAddrNetlink) implementsSockAddr() {}
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ import (
|
|||
"gvisor.dev/gvisor/pkg/syserror"
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
|
||||
|
@ -296,6 +297,7 @@ func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue
|
|||
|
||||
var sockAddrInetSize = int(binary.Size(linux.SockAddrInet{}))
|
||||
var sockAddrInet6Size = int(binary.Size(linux.SockAddrInet6{}))
|
||||
var sockAddrLinkSize = int(binary.Size(linux.SockAddrLink{}))
|
||||
|
||||
// bytesToIPAddress converts an IPv4 or IPv6 address from the user to the
|
||||
// netstack representation taking any addresses into account.
|
||||
|
@ -307,12 +309,12 @@ func bytesToIPAddress(addr []byte) tcpip.Address {
|
|||
}
|
||||
|
||||
// AddressAndFamily reads an sockaddr struct from the given address and
|
||||
// converts it to the FullAddress format. It supports AF_UNIX, AF_INET and
|
||||
// AF_INET6 addresses.
|
||||
// converts it to the FullAddress format. It supports AF_UNIX, AF_INET,
|
||||
// AF_INET6, and AF_PACKET addresses.
|
||||
//
|
||||
// strict indicates whether addresses with the AF_UNSPEC family are accepted of not.
|
||||
//
|
||||
// AddressAndFamily returns an address, its family.
|
||||
// AddressAndFamily returns an address and its family.
|
||||
func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) {
|
||||
// Make sure we have at least 2 bytes for the address family.
|
||||
if len(addr) < 2 {
|
||||
|
@ -371,6 +373,22 @@ func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress,
|
|||
}
|
||||
return out, family, nil
|
||||
|
||||
case linux.AF_PACKET:
|
||||
var a linux.SockAddrLink
|
||||
if len(addr) < sockAddrLinkSize {
|
||||
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
||||
}
|
||||
binary.Unmarshal(addr[:sockAddrLinkSize], usermem.ByteOrder, &a)
|
||||
if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize {
|
||||
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
||||
}
|
||||
|
||||
// TODO(b/129292371): Return protocol too.
|
||||
return tcpip.FullAddress{
|
||||
NIC: tcpip.NICID(a.InterfaceIndex),
|
||||
Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]),
|
||||
}, family, nil
|
||||
|
||||
case linux.AF_UNSPEC:
|
||||
return tcpip.FullAddress{}, family, nil
|
||||
|
||||
|
@ -1951,12 +1969,14 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32)
|
|||
return &out, uint32(2 + l)
|
||||
}
|
||||
return &out, uint32(3 + l)
|
||||
|
||||
case linux.AF_INET:
|
||||
var out linux.SockAddrInet
|
||||
copy(out.Addr[:], addr.Addr)
|
||||
out.Family = linux.AF_INET
|
||||
out.Port = htons(addr.Port)
|
||||
return &out, uint32(binary.Size(out))
|
||||
return &out, uint32(sockAddrInetSize)
|
||||
|
||||
case linux.AF_INET6:
|
||||
var out linux.SockAddrInet6
|
||||
if len(addr.Addr) == 4 {
|
||||
|
@ -1972,7 +1992,17 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32)
|
|||
if isLinkLocal(addr.Addr) {
|
||||
out.Scope_id = uint32(addr.NIC)
|
||||
}
|
||||
return &out, uint32(binary.Size(out))
|
||||
return &out, uint32(sockAddrInet6Size)
|
||||
|
||||
case linux.AF_PACKET:
|
||||
// TODO(b/129292371): Return protocol too.
|
||||
var out linux.SockAddrLink
|
||||
out.Family = linux.AF_PACKET
|
||||
out.InterfaceIndex = int32(addr.NIC)
|
||||
out.HardwareAddrLen = header.EthernetAddressSize
|
||||
copy(out.HardwareAddr[:], addr.Addr)
|
||||
return &out, uint32(sockAddrLinkSize)
|
||||
|
||||
default:
|
||||
return nil, 0
|
||||
}
|
||||
|
|
|
@ -62,6 +62,10 @@ func getTransportProtocol(ctx context.Context, stype linux.SockType, protocol in
|
|||
}
|
||||
|
||||
case linux.SOCK_RAW:
|
||||
// TODO(b/142504697): "In order to create a raw socket, a
|
||||
// process must have the CAP_NET_RAW capability in the user
|
||||
// namespace that governs its network namespace." - raw(7)
|
||||
|
||||
// Raw sockets require CAP_NET_RAW.
|
||||
creds := auth.CredentialsFromContext(ctx)
|
||||
if !creds.HasCapability(linux.CAP_NET_RAW) {
|
||||
|
@ -85,7 +89,8 @@ func getTransportProtocol(ctx context.Context, stype linux.SockType, protocol in
|
|||
return 0, true, syserr.ErrProtocolNotSupported
|
||||
}
|
||||
|
||||
// Socket creates a new socket object for the AF_INET or AF_INET6 family.
|
||||
// Socket creates a new socket object for the AF_INET, AF_INET6, or AF_PACKET
|
||||
// family.
|
||||
func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) {
|
||||
// Fail right away if we don't have a stack.
|
||||
stack := t.NetworkContext()
|
||||
|
@ -99,6 +104,12 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// Packet sockets are handled separately, since they are neither INET
|
||||
// nor INET6 specific.
|
||||
if p.family == linux.AF_PACKET {
|
||||
return packetSocket(t, eps, stype, protocol)
|
||||
}
|
||||
|
||||
// Figure out the transport protocol.
|
||||
transProto, associated, err := getTransportProtocol(t, stype, protocol)
|
||||
if err != nil {
|
||||
|
@ -121,12 +132,47 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
|
|||
return New(t, p.family, stype, int(transProto), wq, ep)
|
||||
}
|
||||
|
||||
func packetSocket(t *kernel.Task, epStack *Stack, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) {
|
||||
// TODO(b/142504697): "In order to create a packet socket, a process
|
||||
// must have the CAP_NET_RAW capability in the user namespace that
|
||||
// governs its network namespace." - packet(7)
|
||||
|
||||
// Packet sockets require CAP_NET_RAW.
|
||||
creds := auth.CredentialsFromContext(t)
|
||||
if !creds.HasCapability(linux.CAP_NET_RAW) {
|
||||
return nil, syserr.ErrNotPermitted
|
||||
}
|
||||
|
||||
// "cooked" packets don't contain link layer information.
|
||||
var cooked bool
|
||||
switch stype {
|
||||
case linux.SOCK_DGRAM:
|
||||
cooked = true
|
||||
case linux.SOCK_RAW:
|
||||
cooked = false
|
||||
default:
|
||||
return nil, syserr.ErrProtocolNotSupported
|
||||
}
|
||||
|
||||
// protocol is passed in network byte order, but netstack wants it in
|
||||
// host order.
|
||||
netProto := tcpip.NetworkProtocolNumber(ntohs(uint16(protocol)))
|
||||
|
||||
wq := &waiter.Queue{}
|
||||
ep, err := epStack.Stack.NewPacketEndpoint(cooked, netProto, wq)
|
||||
if err != nil {
|
||||
return nil, syserr.TranslateNetstackError(err)
|
||||
}
|
||||
|
||||
return New(t, linux.AF_PACKET, stype, protocol, wq, ep)
|
||||
}
|
||||
|
||||
// Pair just returns nil sockets (not supported).
|
||||
func (*provider) Pair(*kernel.Task, linux.SockType, int) (*fs.File, *fs.File, *syserr.Error) {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
// init registers socket providers for AF_INET and AF_INET6.
|
||||
// init registers socket providers for AF_INET, AF_INET6, and AF_PACKET.
|
||||
func init() {
|
||||
// Providers backed by netstack.
|
||||
p := []provider{
|
||||
|
@ -138,6 +184,9 @@ func init() {
|
|||
family: linux.AF_INET6,
|
||||
netProto: ipv6.ProtocolNumber,
|
||||
},
|
||||
{
|
||||
family: linux.AF_PACKET,
|
||||
},
|
||||
}
|
||||
|
||||
for i := range p {
|
||||
|
|
|
@ -50,6 +50,24 @@ const (
|
|||
EthernetAddressSize = 6
|
||||
)
|
||||
|
||||
const (
|
||||
// EthernetProtocolAll is a catch-all for all protocols carried inside
|
||||
// an ethernet frame. It is mainly used to create packet sockets that
|
||||
// capture all traffic.
|
||||
EthernetProtocolAll tcpip.NetworkProtocolNumber = 0x0003
|
||||
|
||||
// EthernetProtocolPUP is the PARC Universial Packet protocol ethertype.
|
||||
EthernetProtocolPUP tcpip.NetworkProtocolNumber = 0x0200
|
||||
)
|
||||
|
||||
// Ethertypes holds the protocol numbers describing the payload of an ethernet
|
||||
// frame. These types aren't necessarily supported by netstack, but can be used
|
||||
// to catch all traffic of a type via packet endpoints.
|
||||
var Ethertypes = []tcpip.NetworkProtocolNumber{
|
||||
EthernetProtocolAll,
|
||||
EthernetProtocolPUP,
|
||||
}
|
||||
|
||||
// SourceAddress returns the "MAC source" field of the ethernet frame header.
|
||||
func (b Ethernet) SourceAddress() tcpip.LinkAddress {
|
||||
return tcpip.LinkAddress(b[srcMAC:][:EthernetAddressSize])
|
||||
|
|
|
@ -72,7 +72,7 @@ func (e *Endpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.Vector
|
|||
|
||||
// InjectLinkAddr injects an inbound packet with a remote link address.
|
||||
func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, vv buffer.VectorisedView) {
|
||||
e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil))
|
||||
e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil), nil /* linkHeader */)
|
||||
}
|
||||
|
||||
// Attach saves the stack network-layer dispatcher for use later when packets
|
||||
|
@ -134,5 +134,22 @@ func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, hdr buffer.Prepen
|
|||
return nil
|
||||
}
|
||||
|
||||
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
|
||||
func (e *Endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
|
||||
p := PacketInfo{
|
||||
Header: packet.ToView(),
|
||||
Proto: 0,
|
||||
Payload: buffer.View{},
|
||||
GSO: nil,
|
||||
}
|
||||
|
||||
select {
|
||||
case e.C <- p:
|
||||
default:
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait implements stack.LinkEndpoint.Wait.
|
||||
func (*Endpoint) Wait() {}
|
||||
|
|
|
@ -430,8 +430,13 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
|
|||
return rawfile.NonBlockingWrite3(e.fds[0], hdr.View(), payload.ToView(), nil)
|
||||
}
|
||||
|
||||
// WriteRawPacket writes a raw packet directly to the file descriptor.
|
||||
func (e *endpoint) WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error {
|
||||
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
|
||||
func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
|
||||
return rawfile.NonBlockingWrite(e.fds[0], packet.ToView())
|
||||
}
|
||||
|
||||
// InjectOutobund implements stack.InjectableEndpoint.InjectOutbound.
|
||||
func (e *endpoint) InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error {
|
||||
return rawfile.NonBlockingWrite(e.fds[0], packet)
|
||||
}
|
||||
|
||||
|
@ -468,9 +473,9 @@ func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
|
|||
e.dispatcher = dispatcher
|
||||
}
|
||||
|
||||
// Inject injects an inbound packet.
|
||||
func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
|
||||
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv)
|
||||
// InjectInbound injects an inbound packet.
|
||||
func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
|
||||
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
|
||||
}
|
||||
|
||||
// NewInjectable creates a new fd-based InjectableEndpoint.
|
||||
|
|
|
@ -169,9 +169,10 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
|
|||
var (
|
||||
p tcpip.NetworkProtocolNumber
|
||||
remote, local tcpip.LinkAddress
|
||||
eth header.Ethernet
|
||||
)
|
||||
if d.e.hdrSize > 0 {
|
||||
eth := header.Ethernet(pkt)
|
||||
eth = header.Ethernet(pkt)
|
||||
p = eth.Type()
|
||||
remote = eth.SourceAddress()
|
||||
local = eth.DestinationAddress()
|
||||
|
@ -189,6 +190,6 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
|
|||
}
|
||||
|
||||
pkt = pkt[d.e.hdrSize:]
|
||||
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}))
|
||||
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}), buffer.View(eth))
|
||||
return true, nil
|
||||
}
|
||||
|
|
|
@ -118,9 +118,10 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
|
|||
var (
|
||||
p tcpip.NetworkProtocolNumber
|
||||
remote, local tcpip.LinkAddress
|
||||
eth header.Ethernet
|
||||
)
|
||||
if d.e.hdrSize > 0 {
|
||||
eth := header.Ethernet(d.views[0])
|
||||
eth = header.Ethernet(d.views[0][:header.EthernetMinimumSize])
|
||||
p = eth.Type()
|
||||
remote = eth.SourceAddress()
|
||||
local = eth.DestinationAddress()
|
||||
|
@ -141,7 +142,7 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
|
|||
vv := buffer.NewVectorisedView(n, d.views[:used])
|
||||
vv.TrimFront(d.e.hdrSize)
|
||||
|
||||
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
|
||||
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth))
|
||||
|
||||
// Prepare e.views for another packet: release used views.
|
||||
for i := 0; i < used; i++ {
|
||||
|
@ -271,9 +272,10 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
|
|||
var (
|
||||
p tcpip.NetworkProtocolNumber
|
||||
remote, local tcpip.LinkAddress
|
||||
eth header.Ethernet
|
||||
)
|
||||
if d.e.hdrSize > 0 {
|
||||
eth := header.Ethernet(d.views[k][0])
|
||||
eth = header.Ethernet(d.views[k][0])
|
||||
p = eth.Type()
|
||||
remote = eth.SourceAddress()
|
||||
local = eth.DestinationAddress()
|
||||
|
@ -293,7 +295,7 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
|
|||
used := d.capViews(k, int(n), BufConfig)
|
||||
vv := buffer.NewVectorisedView(int(n), d.views[k][:used])
|
||||
vv.TrimFront(d.e.hdrSize)
|
||||
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
|
||||
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth))
|
||||
|
||||
// Prepare e.views for another packet: release used views.
|
||||
for i := 0; i < used; i++ {
|
||||
|
|
|
@ -23,6 +23,7 @@ package loopback
|
|||
import (
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
)
|
||||
|
||||
|
@ -70,6 +71,9 @@ func (*endpoint) LinkAddress() tcpip.LinkAddress {
|
|||
return ""
|
||||
}
|
||||
|
||||
// Wait implements stack.LinkEndpoint.Wait.
|
||||
func (*endpoint) Wait() {}
|
||||
|
||||
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
|
||||
// packets to the network-layer dispatcher.
|
||||
func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
|
||||
|
@ -81,10 +85,22 @@ func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependa
|
|||
// Because we're immediately turning around and writing the packet back to the
|
||||
// rx path, we intentionally don't preserve the remote and local link
|
||||
// addresses from the stack.Route we're passed.
|
||||
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv)
|
||||
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait implements stack.LinkEndpoint.Wait.
|
||||
func (*endpoint) Wait() {}
|
||||
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
|
||||
func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
|
||||
// Reject the packet if it's shorter than an ethernet header.
|
||||
if packet.Size() < header.EthernetMinimumSize {
|
||||
return tcpip.ErrBadAddress
|
||||
}
|
||||
|
||||
// There should be an ethernet header at the beginning of packet.
|
||||
linkHeader := header.Ethernet(packet.First()[:header.EthernetMinimumSize])
|
||||
packet.TrimFront(len(linkHeader))
|
||||
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), packet, buffer.View(linkHeader))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -116,7 +116,7 @@ func NewWithFile(lower stack.LinkEndpoint, file *os.File, snapLen uint32) (stack
|
|||
// DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is
|
||||
// called by the link-layer endpoint being wrapped when a packet arrives, and
|
||||
// logs the packet before forwarding to the actual dispatcher.
|
||||
func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
|
||||
func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
|
||||
if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
|
||||
logPacket("recv", protocol, vv.First(), nil)
|
||||
}
|
||||
|
@ -147,7 +147,7 @@ func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local
|
|||
panic(err)
|
||||
}
|
||||
}
|
||||
e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv)
|
||||
e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv, linkHeader)
|
||||
}
|
||||
|
||||
// Attach implements the stack.LinkEndpoint interface. It saves the dispatcher
|
||||
|
@ -218,8 +218,42 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
|
|||
panic(err)
|
||||
}
|
||||
length -= len(hdrBuf)
|
||||
if length > 0 {
|
||||
for _, v := range payload.Views() {
|
||||
logVectorisedView(payload, length, buf)
|
||||
if _, err := e.file.Write(buf.Bytes()); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
return e.lower.WritePacket(r, gso, hdr, payload, protocol)
|
||||
}
|
||||
|
||||
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
|
||||
func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
|
||||
if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
|
||||
logPacket("send", 0, buffer.View("[raw packet, no header available]"), nil /* gso */)
|
||||
}
|
||||
if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 {
|
||||
length := packet.Size()
|
||||
if length > int(e.maxPCAPLen) {
|
||||
length = int(e.maxPCAPLen)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length))
|
||||
if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(packet.Size()))); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
logVectorisedView(packet, length, buf)
|
||||
if _, err := e.file.Write(buf.Bytes()); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
return e.lower.WriteRawPacket(packet)
|
||||
}
|
||||
|
||||
func logVectorisedView(vv buffer.VectorisedView, length int, buf *bytes.Buffer) {
|
||||
if length <= 0 {
|
||||
return
|
||||
}
|
||||
for _, v := range vv.Views() {
|
||||
if len(v) > length {
|
||||
v = v[:length]
|
||||
}
|
||||
|
@ -229,15 +263,9 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
|
|||
}
|
||||
length -= n
|
||||
if length == 0 {
|
||||
break
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, err := e.file.Write(buf.Bytes()); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
return e.lower.WritePacket(r, gso, hdr, payload, protocol)
|
||||
}
|
||||
|
||||
// Wait implements stack.LinkEndpoint.Wait.
|
||||
|
|
|
@ -40,6 +40,9 @@ type NIC struct {
|
|||
endpoints map[NetworkEndpointID]*referencedNetworkEndpoint
|
||||
addressRanges []tcpip.Subnet
|
||||
mcastJoins map[NetworkEndpointID]int32
|
||||
// packetEPs is protected by mu, but the contained PacketEndpoint
|
||||
// values are not.
|
||||
packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
|
||||
|
||||
stats NICStats
|
||||
|
||||
|
@ -78,7 +81,7 @@ const (
|
|||
)
|
||||
|
||||
func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback bool) *NIC {
|
||||
return &NIC{
|
||||
nic := &NIC{
|
||||
stack: stack,
|
||||
id: id,
|
||||
name: name,
|
||||
|
@ -87,6 +90,7 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback
|
|||
primary: make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint),
|
||||
endpoints: make(map[NetworkEndpointID]*referencedNetworkEndpoint),
|
||||
mcastJoins: make(map[NetworkEndpointID]int32),
|
||||
packetEPs: make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint),
|
||||
stats: NICStats{
|
||||
Tx: DirectionStats{
|
||||
Packets: &tcpip.StatCounter{},
|
||||
|
@ -101,6 +105,16 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback
|
|||
dad: make(map[tcpip.Address]dadState),
|
||||
},
|
||||
}
|
||||
|
||||
// Register supported packet endpoint protocols.
|
||||
for _, netProto := range header.Ethertypes {
|
||||
nic.packetEPs[netProto] = []PacketEndpoint{}
|
||||
}
|
||||
for _, netProto := range stack.networkProtocols {
|
||||
nic.packetEPs[netProto.Number()] = []PacketEndpoint{}
|
||||
}
|
||||
|
||||
return nic
|
||||
}
|
||||
|
||||
// enable enables the NIC. enable will attach the link to its LinkEndpoint and
|
||||
|
@ -631,7 +645,7 @@ func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address,
|
|||
// Note that the ownership of the slice backing vv is retained by the caller.
|
||||
// This rule applies only to the slice itself, not to the items of the slice;
|
||||
// the ownership of the items is not retained by the caller.
|
||||
func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
|
||||
func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
|
||||
n.stats.Rx.Packets.Increment()
|
||||
n.stats.Rx.Bytes.IncrementBy(uint64(vv.Size()))
|
||||
|
||||
|
@ -641,6 +655,26 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
|
|||
return
|
||||
}
|
||||
|
||||
// If no local link layer address is provided, assume it was sent
|
||||
// directly to this NIC.
|
||||
if local == "" {
|
||||
local = n.linkEP.LinkAddress()
|
||||
}
|
||||
|
||||
// Are any packet sockets listening for this network protocol?
|
||||
n.mu.RLock()
|
||||
packetEPs := n.packetEPs[protocol]
|
||||
// Check whether there are packet sockets listening for every protocol.
|
||||
// If we received a packet with protocol EthernetProtocolAll, then the
|
||||
// previous for loop will have handled it.
|
||||
if protocol != header.EthernetProtocolAll {
|
||||
packetEPs = append(packetEPs, n.packetEPs[header.EthernetProtocolAll]...)
|
||||
}
|
||||
n.mu.RUnlock()
|
||||
for _, ep := range packetEPs {
|
||||
ep.HandlePacket(n.id, local, protocol, vv, linkHeader)
|
||||
}
|
||||
|
||||
if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
|
||||
n.stack.stats.IP.PacketsReceived.Increment()
|
||||
}
|
||||
|
@ -700,7 +734,10 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
|
|||
return
|
||||
}
|
||||
|
||||
// If a packet socket handled the packet, don't treat it as invalid.
|
||||
if len(packetEPs) == 0 {
|
||||
n.stack.stats.IP.InvalidAddressesReceived.Increment()
|
||||
}
|
||||
}
|
||||
|
||||
// DeliverTransportPacket delivers the packets to the appropriate transport
|
||||
|
@ -856,6 +893,36 @@ const (
|
|||
temporary
|
||||
)
|
||||
|
||||
func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
eps, ok := n.packetEPs[netProto]
|
||||
if !ok {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
n.packetEPs[netProto] = append(eps, ep)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
eps, ok := n.packetEPs[netProto]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
for i, epOther := range eps {
|
||||
if epOther == ep {
|
||||
n.packetEPs[netProto] = append(eps[:i], eps[i+1:]...)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type referencedNetworkEndpoint struct {
|
||||
ep NetworkEndpoint
|
||||
nic *NIC
|
||||
|
|
|
@ -71,8 +71,8 @@ type TransportEndpoint interface {
|
|||
|
||||
// RawTransportEndpoint is the interface that needs to be implemented by raw
|
||||
// transport protocol endpoints. RawTransportEndpoints receive the entire
|
||||
// packet - including the link, network, and transport headers - as delivered
|
||||
// to netstack.
|
||||
// packet - including the network and transport headers - as delivered to
|
||||
// netstack.
|
||||
type RawTransportEndpoint interface {
|
||||
// HandlePacket is called by the stack when new packets arrive to
|
||||
// this transport endpoint. The packet contains all data from the link
|
||||
|
@ -80,6 +80,22 @@ type RawTransportEndpoint interface {
|
|||
HandlePacket(r *Route, netHeader buffer.View, packet buffer.VectorisedView)
|
||||
}
|
||||
|
||||
// PacketEndpoint is the interface that needs to be implemented by packet
|
||||
// transport protocol endpoints. These endpoints receive link layer headers in
|
||||
// addition to whatever they contain (usually network and transport layer
|
||||
// headers and a payload).
|
||||
type PacketEndpoint interface {
|
||||
// HandlePacket is called by the stack when new packets arrive that
|
||||
// match the endpoint.
|
||||
//
|
||||
// Implementers should treat packet as immutable and should copy it
|
||||
// before before modification.
|
||||
//
|
||||
// linkHeader may have a length of 0, in which case the PacketEndpoint
|
||||
// should construct its own ethernet header for applications.
|
||||
HandlePacket(nicid tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, packet buffer.VectorisedView, linkHeader buffer.View)
|
||||
}
|
||||
|
||||
// TransportProtocol is the interface that needs to be implemented by transport
|
||||
// protocols (e.g., tcp, udp) that want to be part of the networking stack.
|
||||
type TransportProtocol interface {
|
||||
|
@ -242,9 +258,10 @@ type NetworkProtocol interface {
|
|||
// packets to the appropriate network endpoint after it has been handled by
|
||||
// the data link layer.
|
||||
type NetworkDispatcher interface {
|
||||
// DeliverNetworkPacket finds the appropriate network protocol
|
||||
// endpoint and hands the packet over for further processing.
|
||||
DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView)
|
||||
// DeliverNetworkPacket finds the appropriate network protocol endpoint
|
||||
// and hands the packet over for further processing. linkHeader may have
|
||||
// length 0 when the caller does not have ethernet data.
|
||||
DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View)
|
||||
}
|
||||
|
||||
// LinkEndpointCapabilities is the type associated with the capabilities
|
||||
|
@ -301,6 +318,10 @@ type LinkEndpoint interface {
|
|||
// r.LocalLinkAddress if it is provided.
|
||||
WritePacket(r *Route, gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error
|
||||
|
||||
// WriteRawPacket writes a packet directly to the link. The packet
|
||||
// should already have an ethernet header.
|
||||
WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error
|
||||
|
||||
// Attach attaches the data link layer endpoint to the network-layer
|
||||
// dispatcher of the stack.
|
||||
Attach(dispatcher NetworkDispatcher)
|
||||
|
@ -324,13 +345,14 @@ type LinkEndpoint interface {
|
|||
type InjectableLinkEndpoint interface {
|
||||
LinkEndpoint
|
||||
|
||||
// Inject injects an inbound packet.
|
||||
Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView)
|
||||
// InjectInbound injects an inbound packet.
|
||||
InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView)
|
||||
|
||||
// WriteRawPacket writes a fully formed outbound packet directly to the link.
|
||||
// InjectOutbound writes a fully formed outbound packet directly to the
|
||||
// link.
|
||||
//
|
||||
// dest is used by endpoints with multiple raw destinations.
|
||||
WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error
|
||||
InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error
|
||||
}
|
||||
|
||||
// A LinkAddressResolver is an extension to a NetworkProtocol that
|
||||
|
@ -379,11 +401,16 @@ type LinkAddressCache interface {
|
|||
RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker)
|
||||
}
|
||||
|
||||
// UnassociatedEndpointFactory produces endpoints for writing packets not
|
||||
// associated with a particular transport protocol. Such endpoints can be used
|
||||
// to write arbitrary packets that include the IP header.
|
||||
type UnassociatedEndpointFactory interface {
|
||||
NewUnassociatedRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
|
||||
// RawFactory produces endpoints for writing various types of raw packets.
|
||||
type RawFactory interface {
|
||||
// NewUnassociatedEndpoint produces endpoints for writing packets not
|
||||
// associated with a particular transport protocol. Such endpoints can
|
||||
// be used to write arbitrary packets that include the network header.
|
||||
NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
|
||||
|
||||
// NewPacketEndpoint produces endpoints for reading and writing packets
|
||||
// that include network and (when cooked is false) link layer headers.
|
||||
NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
|
||||
}
|
||||
|
||||
// GSOType is the type of GSO segments.
|
||||
|
|
|
@ -351,10 +351,9 @@ type Stack struct {
|
|||
networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol
|
||||
linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver
|
||||
|
||||
// unassociatedFactory creates unassociated endpoints. If nil, raw
|
||||
// endpoints are disabled. It is set during Stack creation and is
|
||||
// immutable.
|
||||
unassociatedFactory UnassociatedEndpointFactory
|
||||
// rawFactory creates raw endpoints. If nil, raw endpoints are
|
||||
// disabled. It is set during Stack creation and is immutable.
|
||||
rawFactory RawFactory
|
||||
|
||||
demux *transportDemuxer
|
||||
|
||||
|
@ -425,16 +424,16 @@ type Options struct {
|
|||
// stack (false).
|
||||
HandleLocal bool
|
||||
|
||||
// UnassociatedFactory produces unassociated endpoints raw endpoints.
|
||||
// Raw endpoints are enabled only if this is non-nil.
|
||||
UnassociatedFactory UnassociatedEndpointFactory
|
||||
|
||||
// NDPConfigs is the NDP configurations used by interfaces.
|
||||
//
|
||||
// By default, NDPConfigs will have a zero value for its
|
||||
// DupAddrDetectTransmits field, implying that DAD will not be performed
|
||||
// before assigning an address to a NIC.
|
||||
NDPConfigs NDPConfigurations
|
||||
|
||||
// RawFactory produces raw endpoints. Raw endpoints are enabled only if
|
||||
// this is non-nil.
|
||||
RawFactory RawFactory
|
||||
}
|
||||
|
||||
// TransportEndpointInfo holds useful information about a transport endpoint
|
||||
|
@ -514,8 +513,8 @@ func New(opts Options) *Stack {
|
|||
}
|
||||
}
|
||||
|
||||
// Add the factory for unassociated endpoints, if present.
|
||||
s.unassociatedFactory = opts.UnassociatedFactory
|
||||
// Add the factory for raw endpoints, if present.
|
||||
s.rawFactory = opts.RawFactory
|
||||
|
||||
// Create the global transport demuxer.
|
||||
s.demux = newTransportDemuxer(s)
|
||||
|
@ -650,12 +649,12 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp
|
|||
// protocol. Raw endpoints receive all traffic for a given protocol regardless
|
||||
// of address.
|
||||
func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) {
|
||||
if s.unassociatedFactory == nil {
|
||||
if s.rawFactory == nil {
|
||||
return nil, tcpip.ErrNotPermitted
|
||||
}
|
||||
|
||||
if !associated {
|
||||
return s.unassociatedFactory.NewUnassociatedRawEndpoint(s, network, transport, waiterQueue)
|
||||
return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue)
|
||||
}
|
||||
|
||||
t, ok := s.transportProtocols[transport]
|
||||
|
@ -666,6 +665,16 @@ func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network
|
|||
return t.proto.NewRawEndpoint(s, network, waiterQueue)
|
||||
}
|
||||
|
||||
// NewPacketEndpoint creates a new packet endpoint listening for the given
|
||||
// netProto.
|
||||
func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
|
||||
if s.rawFactory == nil {
|
||||
return nil, tcpip.ErrNotPermitted
|
||||
}
|
||||
|
||||
return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue)
|
||||
}
|
||||
|
||||
// createNIC creates a NIC with the provided id and link-layer endpoint, and
|
||||
// optionally enable it.
|
||||
func (s *Stack) createNIC(id tcpip.NICID, name string, ep LinkEndpoint, enabled, loopback bool) *tcpip.Error {
|
||||
|
@ -1135,6 +1144,109 @@ func (s *Stack) Resume() {
|
|||
}
|
||||
}
|
||||
|
||||
// RegisterPacketEndpoint registers ep with the stack, causing it to receive
|
||||
// all traffic of the specified netProto on the given NIC. If nicID is 0, it
|
||||
// receives traffic from every NIC.
|
||||
func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
// If no NIC is specified, capture on all devices.
|
||||
if nicID == 0 {
|
||||
// Register with each NIC.
|
||||
for _, nic := range s.nics {
|
||||
if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
|
||||
s.unregisterPacketEndpointLocked(0, netProto, ep)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Capture on a specific device.
|
||||
nic, ok := s.nics[nicID]
|
||||
if !ok {
|
||||
return tcpip.ErrUnknownNICID
|
||||
}
|
||||
if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnregisterPacketEndpoint unregisters ep for packets of the specified
|
||||
// netProto from the specified NIC. If nicID is 0, ep is unregistered from all
|
||||
// NICs.
|
||||
func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.unregisterPacketEndpointLocked(nicID, netProto, ep)
|
||||
}
|
||||
|
||||
func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
|
||||
// If no NIC is specified, unregister on all devices.
|
||||
if nicID == 0 {
|
||||
// Unregister with each NIC.
|
||||
for _, nic := range s.nics {
|
||||
nic.unregisterPacketEndpoint(netProto, ep)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Unregister in a single device.
|
||||
nic, ok := s.nics[nicID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
nic.unregisterPacketEndpoint(netProto, ep)
|
||||
}
|
||||
|
||||
// WritePacket writes data directly to the specified NIC. It adds an ethernet
|
||||
// header based on the arguments.
|
||||
func (s *Stack) WritePacket(nicid tcpip.NICID, dst tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) *tcpip.Error {
|
||||
s.mu.Lock()
|
||||
nic, ok := s.nics[nicid]
|
||||
s.mu.Unlock()
|
||||
if !ok {
|
||||
return tcpip.ErrUnknownDevice
|
||||
}
|
||||
|
||||
// Add our own fake ethernet header.
|
||||
ethFields := header.EthernetFields{
|
||||
SrcAddr: nic.linkEP.LinkAddress(),
|
||||
DstAddr: dst,
|
||||
Type: netProto,
|
||||
}
|
||||
fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
|
||||
fakeHeader.Encode(ðFields)
|
||||
ethHeader := buffer.View(fakeHeader).ToVectorisedView()
|
||||
ethHeader.Append(payload)
|
||||
|
||||
if err := nic.linkEP.WriteRawPacket(ethHeader); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteRawPacket writes data directly to the specified NIC without adding any
|
||||
// headers.
|
||||
func (s *Stack) WriteRawPacket(nicid tcpip.NICID, payload buffer.VectorisedView) *tcpip.Error {
|
||||
s.mu.Lock()
|
||||
nic, ok := s.nics[nicid]
|
||||
s.mu.Unlock()
|
||||
if !ok {
|
||||
return tcpip.ErrUnknownDevice
|
||||
}
|
||||
|
||||
if err := nic.linkEP.WriteRawPacket(payload); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NetworkProtocolInstance returns the protocol instance in the stack for the
|
||||
// specified network protocol. This method is public for protocol implementers
|
||||
// and tests to use.
|
||||
|
|
|
@ -465,7 +465,7 @@ func (d *transportDemuxer) findEndpointLocked(eps *transportEndpoints, vv buffer
|
|||
func (d *transportDemuxer) registerRawEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error {
|
||||
eps, ok := d.protocol[protocolIDs{netProto, transProto}]
|
||||
if !ok {
|
||||
return nil
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
eps.mu.Lock()
|
||||
|
|
|
@ -255,7 +255,7 @@ type FullAddress struct {
|
|||
// This may not be used by all endpoint types.
|
||||
NIC NICID
|
||||
|
||||
// Addr is the network address.
|
||||
// Addr is the network or link layer address.
|
||||
Addr Address
|
||||
|
||||
// Port is the transport port.
|
||||
|
|
|
@ -0,0 +1,363 @@
|
|||
// Copyright 2019 The gVisor Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package packet provides the implementation of packet sockets (see
|
||||
// packet(7)). Packet sockets allow applications to:
|
||||
//
|
||||
// * manually write and inspect link, network, and transport headers
|
||||
// * receive all traffic of a given network protocol, or all protocols
|
||||
//
|
||||
// Packet sockets are similar to raw sockets, but provide even more power to
|
||||
// users, letting them effectively talk directly to the network device.
|
||||
//
|
||||
// Packet sockets skip the input and output iptables chains.
|
||||
package packet
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/iptables"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
"gvisor.dev/gvisor/pkg/waiter"
|
||||
)
|
||||
|
||||
// +stateify savable
|
||||
type packet struct {
|
||||
packetEntry
|
||||
// data holds the actual packet data, including any headers and
|
||||
// payload.
|
||||
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
|
||||
// views is pre-allocated space to back data. As long as the packet is
|
||||
// made up of fewer than 8 buffer.Views, no extra allocation is
|
||||
// necessary to store packet data.
|
||||
views [8]buffer.View `state:"nosave"`
|
||||
// timestampNS is the unix time at which the packet was received.
|
||||
timestampNS int64
|
||||
// senderAddr is the network address of the sender.
|
||||
senderAddr tcpip.FullAddress
|
||||
}
|
||||
|
||||
// endpoint is the packet socket implementation of tcpip.Endpoint. It is legal
|
||||
// to have goroutines make concurrent calls into the endpoint.
|
||||
//
|
||||
// Lock order:
|
||||
// endpoint.mu
|
||||
// endpoint.rcvMu
|
||||
//
|
||||
// +stateify savable
|
||||
type endpoint struct {
|
||||
stack.TransportEndpointInfo
|
||||
// The following fields are initialized at creation time and are
|
||||
// immutable.
|
||||
stack *stack.Stack `state:"manual"`
|
||||
netProto tcpip.NetworkProtocolNumber
|
||||
waiterQueue *waiter.Queue
|
||||
cooked bool
|
||||
|
||||
// The following fields are used to manage the receive queue and are
|
||||
// protected by rcvMu.
|
||||
rcvMu sync.Mutex `state:"nosave"`
|
||||
rcvList packetList
|
||||
rcvBufSizeMax int `state:".(int)"`
|
||||
rcvBufSize int
|
||||
rcvClosed bool
|
||||
|
||||
// The following fields are protected by mu.
|
||||
mu sync.RWMutex `state:"nosave"`
|
||||
sndBufSize int
|
||||
closed bool
|
||||
stats tcpip.TransportEndpointStats `state:"nosave"`
|
||||
}
|
||||
|
||||
// NewEndpoint returns a new packet endpoint.
|
||||
func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
|
||||
ep := &endpoint{
|
||||
stack: s,
|
||||
TransportEndpointInfo: stack.TransportEndpointInfo{
|
||||
NetProto: netProto,
|
||||
},
|
||||
cooked: cooked,
|
||||
netProto: netProto,
|
||||
waiterQueue: waiterQueue,
|
||||
rcvBufSizeMax: 32 * 1024,
|
||||
sndBufSize: 32 * 1024,
|
||||
}
|
||||
|
||||
if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ep, nil
|
||||
}
|
||||
|
||||
// Close implements tcpip.Endpoint.Close.
|
||||
func (ep *endpoint) Close() {
|
||||
ep.mu.Lock()
|
||||
defer ep.mu.Unlock()
|
||||
|
||||
if ep.closed {
|
||||
return
|
||||
}
|
||||
|
||||
ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep)
|
||||
|
||||
ep.rcvMu.Lock()
|
||||
defer ep.rcvMu.Unlock()
|
||||
|
||||
// Clear the receive list.
|
||||
ep.rcvClosed = true
|
||||
ep.rcvBufSize = 0
|
||||
for !ep.rcvList.Empty() {
|
||||
ep.rcvList.Remove(ep.rcvList.Front())
|
||||
}
|
||||
|
||||
ep.closed = true
|
||||
ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
|
||||
}
|
||||
|
||||
// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
|
||||
func (ep *endpoint) ModerateRecvBuf(copied int) {}
|
||||
|
||||
// IPTables implements tcpip.Endpoint.IPTables.
|
||||
func (ep *endpoint) IPTables() (iptables.IPTables, error) {
|
||||
return ep.stack.IPTables(), nil
|
||||
}
|
||||
|
||||
// Read implements tcpip.Endpoint.Read.
|
||||
func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
|
||||
ep.rcvMu.Lock()
|
||||
|
||||
// If there's no data to read, return that read would block or that the
|
||||
// endpoint is closed.
|
||||
if ep.rcvList.Empty() {
|
||||
err := tcpip.ErrWouldBlock
|
||||
if ep.rcvClosed {
|
||||
ep.stats.ReadErrors.ReadClosed.Increment()
|
||||
err = tcpip.ErrClosedForReceive
|
||||
}
|
||||
ep.rcvMu.Unlock()
|
||||
return buffer.View{}, tcpip.ControlMessages{}, err
|
||||
}
|
||||
|
||||
packet := ep.rcvList.Front()
|
||||
ep.rcvList.Remove(packet)
|
||||
ep.rcvBufSize -= packet.data.Size()
|
||||
|
||||
ep.rcvMu.Unlock()
|
||||
|
||||
if addr != nil {
|
||||
*addr = packet.senderAddr
|
||||
}
|
||||
|
||||
return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil
|
||||
}
|
||||
|
||||
func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
|
||||
// TODO(b/129292371): Implement.
|
||||
return 0, nil, tcpip.ErrInvalidOptionValue
|
||||
}
|
||||
|
||||
// Peek implements tcpip.Endpoint.Peek.
|
||||
func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
|
||||
return 0, tcpip.ControlMessages{}, nil
|
||||
}
|
||||
|
||||
// Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
|
||||
// disconnected, and this function always returns tpcip.ErrNotSupported.
|
||||
func (*endpoint) Disconnect() *tcpip.Error {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
|
||||
// connected, and this function always returnes tcpip.ErrNotSupported.
|
||||
func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
|
||||
// with Shutdown, and this function always returns tcpip.ErrNotSupported.
|
||||
func (ep *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
|
||||
// Listen, and this function always returns tcpip.ErrNotSupported.
|
||||
func (ep *endpoint) Listen(backlog int) *tcpip.Error {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
|
||||
// Accept, and this function always returns tcpip.ErrNotSupported.
|
||||
func (ep *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
|
||||
return nil, nil, tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// Bind implements tcpip.Endpoint.Bind.
|
||||
func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
|
||||
// TODO(gvisor.dev/issue/173): Add Bind support.
|
||||
|
||||
// "By default, all packets of the specified protocol type are passed
|
||||
// to a packet socket. To get packets only from a specific interface
|
||||
// use bind(2) specifying an address in a struct sockaddr_ll to bind
|
||||
// the packet socket to an interface. Fields used for binding are
|
||||
// sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex."
|
||||
// - packet(7).
|
||||
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
|
||||
func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
|
||||
return tcpip.FullAddress{}, tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
|
||||
func (ep *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
|
||||
// Even a connected socket doesn't return a remote address.
|
||||
return tcpip.FullAddress{}, tcpip.ErrNotConnected
|
||||
}
|
||||
|
||||
// Readiness implements tcpip.Endpoint.Readiness.
|
||||
func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
|
||||
// The endpoint is always writable.
|
||||
result := waiter.EventOut & mask
|
||||
|
||||
// Determine whether the endpoint is readable.
|
||||
if (mask & waiter.EventIn) != 0 {
|
||||
ep.rcvMu.Lock()
|
||||
if !ep.rcvList.Empty() || ep.rcvClosed {
|
||||
result |= waiter.EventIn
|
||||
}
|
||||
ep.rcvMu.Unlock()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
|
||||
// used with SetSockOpt, and this function always returns
|
||||
// tcpip.ErrNotSupported.
|
||||
func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
|
||||
func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
|
||||
return tcpip.ErrUnknownProtocolOption
|
||||
}
|
||||
|
||||
// GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
|
||||
func (ep *endpoint) GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error) {
|
||||
return 0, tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
|
||||
func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
|
||||
return tcpip.ErrNotSupported
|
||||
}
|
||||
|
||||
// HandlePacket implements stack.PacketEndpoint.HandlePacket.
|
||||
func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, ethHeader buffer.View) {
|
||||
ep.rcvMu.Lock()
|
||||
|
||||
// Drop the packet if our buffer is currently full.
|
||||
if ep.rcvClosed {
|
||||
ep.rcvMu.Unlock()
|
||||
ep.stack.Stats().DroppedPackets.Increment()
|
||||
ep.stats.ReceiveErrors.ClosedReceiver.Increment()
|
||||
return
|
||||
}
|
||||
|
||||
if ep.rcvBufSize >= ep.rcvBufSizeMax {
|
||||
ep.rcvMu.Unlock()
|
||||
ep.stack.Stats().DroppedPackets.Increment()
|
||||
ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
|
||||
return
|
||||
}
|
||||
|
||||
wasEmpty := ep.rcvBufSize == 0
|
||||
|
||||
// Push new packet into receive list and increment the buffer size.
|
||||
var packet packet
|
||||
// TODO(b/129292371): Return network protocol.
|
||||
if len(ethHeader) > 0 {
|
||||
// Get info directly from the ethernet header.
|
||||
hdr := header.Ethernet(ethHeader)
|
||||
packet.senderAddr = tcpip.FullAddress{
|
||||
NIC: nicid,
|
||||
Addr: tcpip.Address(hdr.SourceAddress()),
|
||||
}
|
||||
} else {
|
||||
// Guess the would-be ethernet header.
|
||||
packet.senderAddr = tcpip.FullAddress{
|
||||
NIC: nicid,
|
||||
Addr: tcpip.Address(localAddr),
|
||||
}
|
||||
}
|
||||
|
||||
if ep.cooked {
|
||||
// Cooked packets can simply be queued.
|
||||
packet.data = vv.Clone(packet.views[:])
|
||||
} else {
|
||||
// Raw packets need their ethernet headers prepended before
|
||||
// queueing.
|
||||
if len(ethHeader) == 0 {
|
||||
// We weren't provided with an actual ethernet header,
|
||||
// so fake one.
|
||||
ethFields := header.EthernetFields{
|
||||
SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
|
||||
DstAddr: localAddr,
|
||||
Type: netProto,
|
||||
}
|
||||
fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
|
||||
fakeHeader.Encode(ðFields)
|
||||
ethHeader = buffer.View(fakeHeader)
|
||||
}
|
||||
combinedVV := buffer.View(ethHeader).ToVectorisedView()
|
||||
combinedVV.Append(vv)
|
||||
packet.data = combinedVV.Clone(packet.views[:])
|
||||
}
|
||||
packet.timestampNS = ep.stack.NowNanoseconds()
|
||||
|
||||
ep.rcvList.PushBack(&packet)
|
||||
ep.rcvBufSize += packet.data.Size()
|
||||
|
||||
ep.rcvMu.Unlock()
|
||||
ep.stats.PacketsReceived.Increment()
|
||||
// Notify waiters that there's data to be read.
|
||||
if wasEmpty {
|
||||
ep.waiterQueue.Notify(waiter.EventIn)
|
||||
}
|
||||
}
|
||||
|
||||
// State implements socket.Socket.State.
|
||||
func (ep *endpoint) State() uint32 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Info returns a copy of the endpoint info.
|
||||
func (ep *endpoint) Info() tcpip.EndpointInfo {
|
||||
ep.mu.RLock()
|
||||
// Make a copy of the endpoint info.
|
||||
ret := ep.TransportEndpointInfo
|
||||
ep.mu.RUnlock()
|
||||
return &ret
|
||||
}
|
||||
|
||||
// Stats returns a pointer to the endpoint stats.
|
||||
func (ep *endpoint) Stats() tcpip.EndpointStats {
|
||||
return &ep.stats
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright 2018 The gVisor Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package packet
|
||||
|
||||
import (
|
||||
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
)
|
||||
|
||||
// saveData saves packet.data field.
|
||||
func (p *packet) saveData() buffer.VectorisedView {
|
||||
// We cannot save p.data directly as p.data.views may alias to p.views,
|
||||
// which is not allowed by state framework (in-struct pointer).
|
||||
return p.data.Clone(nil)
|
||||
}
|
||||
|
||||
// loadData loads packet.data field.
|
||||
func (p *packet) loadData(data buffer.VectorisedView) {
|
||||
// NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization
|
||||
// here because data.views is not guaranteed to be loaded by now. Plus,
|
||||
// data.views will be allocated anyway so there really is little point
|
||||
// of utilizing p.views for data.views.
|
||||
p.data = data
|
||||
}
|
||||
|
||||
// beforeSave is invoked by stateify.
|
||||
func (ep *endpoint) beforeSave() {
|
||||
// Stop incoming packets from being handled (and mutate endpoint state).
|
||||
// The lock will be released after saveRcvBufSizeMax(), which would have
|
||||
// saved ep.rcvBufSizeMax and set it to 0 to continue blocking incoming
|
||||
// packets.
|
||||
ep.rcvMu.Lock()
|
||||
}
|
||||
|
||||
// saveRcvBufSizeMax is invoked by stateify.
|
||||
func (ep *endpoint) saveRcvBufSizeMax() int {
|
||||
max := ep.rcvBufSizeMax
|
||||
// Make sure no new packets will be handled regardless of the lock.
|
||||
ep.rcvBufSizeMax = 0
|
||||
// Release the lock acquired in beforeSave() so regular endpoint closing
|
||||
// logic can proceed after save.
|
||||
ep.rcvMu.Unlock()
|
||||
return max
|
||||
}
|
||||
|
||||
// loadRcvBufSizeMax is invoked by stateify.
|
||||
func (ep *endpoint) loadRcvBufSizeMax(max int) {
|
||||
ep.rcvBufSizeMax = max
|
||||
}
|
||||
|
||||
// afterLoad is invoked by stateify.
|
||||
func (ep *endpoint) afterLoad() {
|
||||
// StackFromEnv is a stack used specifically for save/restore.
|
||||
ep.stack = stack.StackFromEnv
|
||||
|
||||
// TODO(gvisor.dev/173): Once bind is supported, choose the right NIC.
|
||||
if err := ep.stack.RegisterPacketEndpoint(0, ep.netProto, ep); err != nil {
|
||||
panic(*err)
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package raw
|
||||
package packet
|
||||
|
||||
// ElementMapper provides an identity mapping by default.
|
||||
//
|
|
@ -0,0 +1,88 @@
|
|||
// automatically generated by stateify.
|
||||
|
||||
package packet
|
||||
|
||||
import (
|
||||
"gvisor.dev/gvisor/pkg/state"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
||||
)
|
||||
|
||||
func (x *packet) beforeSave() {}
|
||||
func (x *packet) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
var data buffer.VectorisedView = x.saveData()
|
||||
m.SaveValue("data", data)
|
||||
m.Save("packetEntry", &x.packetEntry)
|
||||
m.Save("timestampNS", &x.timestampNS)
|
||||
m.Save("senderAddr", &x.senderAddr)
|
||||
}
|
||||
|
||||
func (x *packet) afterLoad() {}
|
||||
func (x *packet) load(m state.Map) {
|
||||
m.Load("packetEntry", &x.packetEntry)
|
||||
m.Load("timestampNS", &x.timestampNS)
|
||||
m.Load("senderAddr", &x.senderAddr)
|
||||
m.LoadValue("data", new(buffer.VectorisedView), func(y interface{}) { x.loadData(y.(buffer.VectorisedView)) })
|
||||
}
|
||||
|
||||
func (x *endpoint) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
var rcvBufSizeMax int = x.saveRcvBufSizeMax()
|
||||
m.SaveValue("rcvBufSizeMax", rcvBufSizeMax)
|
||||
m.Save("TransportEndpointInfo", &x.TransportEndpointInfo)
|
||||
m.Save("netProto", &x.netProto)
|
||||
m.Save("waiterQueue", &x.waiterQueue)
|
||||
m.Save("cooked", &x.cooked)
|
||||
m.Save("rcvList", &x.rcvList)
|
||||
m.Save("rcvBufSize", &x.rcvBufSize)
|
||||
m.Save("rcvClosed", &x.rcvClosed)
|
||||
m.Save("sndBufSize", &x.sndBufSize)
|
||||
m.Save("closed", &x.closed)
|
||||
}
|
||||
|
||||
func (x *endpoint) load(m state.Map) {
|
||||
m.Load("TransportEndpointInfo", &x.TransportEndpointInfo)
|
||||
m.Load("netProto", &x.netProto)
|
||||
m.Load("waiterQueue", &x.waiterQueue)
|
||||
m.Load("cooked", &x.cooked)
|
||||
m.Load("rcvList", &x.rcvList)
|
||||
m.Load("rcvBufSize", &x.rcvBufSize)
|
||||
m.Load("rcvClosed", &x.rcvClosed)
|
||||
m.Load("sndBufSize", &x.sndBufSize)
|
||||
m.Load("closed", &x.closed)
|
||||
m.LoadValue("rcvBufSizeMax", new(int), func(y interface{}) { x.loadRcvBufSizeMax(y.(int)) })
|
||||
m.AfterLoad(x.afterLoad)
|
||||
}
|
||||
|
||||
func (x *packetList) beforeSave() {}
|
||||
func (x *packetList) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
m.Save("head", &x.head)
|
||||
m.Save("tail", &x.tail)
|
||||
}
|
||||
|
||||
func (x *packetList) afterLoad() {}
|
||||
func (x *packetList) load(m state.Map) {
|
||||
m.Load("head", &x.head)
|
||||
m.Load("tail", &x.tail)
|
||||
}
|
||||
|
||||
func (x *packetEntry) beforeSave() {}
|
||||
func (x *packetEntry) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
m.Save("next", &x.next)
|
||||
m.Save("prev", &x.prev)
|
||||
}
|
||||
|
||||
func (x *packetEntry) afterLoad() {}
|
||||
func (x *packetEntry) load(m state.Map) {
|
||||
m.Load("next", &x.next)
|
||||
m.Load("prev", &x.prev)
|
||||
}
|
||||
|
||||
func init() {
|
||||
state.Register("packet.packet", (*packet)(nil), state.Fns{Save: (*packet).save, Load: (*packet).load})
|
||||
state.Register("packet.endpoint", (*endpoint)(nil), state.Fns{Save: (*endpoint).save, Load: (*endpoint).load})
|
||||
state.Register("packet.packetList", (*packetList)(nil), state.Fns{Save: (*packetList).save, Load: (*packetList).load})
|
||||
state.Register("packet.packetEntry", (*packetEntry)(nil), state.Fns{Save: (*packetEntry).save, Load: (*packetEntry).load})
|
||||
}
|
|
@ -17,8 +17,7 @@
|
|||
//
|
||||
// * manually write and inspect transport layer headers and payloads
|
||||
// * receive all traffic of a given transport protocol (e.g. ICMP or UDP)
|
||||
// * optionally write and inspect network layer and link layer headers for
|
||||
// packets
|
||||
// * optionally write and inspect network layer headers of packets
|
||||
//
|
||||
// Raw sockets don't have any notion of ports, and incoming packets are
|
||||
// demultiplexed solely by protocol number. Thus, a raw UDP endpoint will
|
||||
|
@ -38,8 +37,8 @@ import (
|
|||
)
|
||||
|
||||
// +stateify savable
|
||||
type packet struct {
|
||||
packetEntry
|
||||
type rawPacket struct {
|
||||
rawPacketEntry
|
||||
// data holds the actual packet data, including any headers and
|
||||
// payload.
|
||||
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
|
||||
|
@ -72,7 +71,7 @@ type endpoint struct {
|
|||
// The following fields are used to manage the receive queue and are
|
||||
// protected by rcvMu.
|
||||
rcvMu sync.Mutex `state:"nosave"`
|
||||
rcvList packetList
|
||||
rcvList rawPacketList
|
||||
rcvBufSizeMax int `state:".(int)"`
|
||||
rcvBufSize int
|
||||
rcvClosed bool
|
||||
|
@ -90,7 +89,6 @@ type endpoint struct {
|
|||
}
|
||||
|
||||
// NewEndpoint returns a raw endpoint for the given protocols.
|
||||
// TODO(b/129292371): IP_HDRINCL and AF_PACKET.
|
||||
func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
|
||||
return newEndpoint(stack, netProto, transProto, waiterQueue, true /* associated */)
|
||||
}
|
||||
|
@ -187,17 +185,17 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
|
|||
return buffer.View{}, tcpip.ControlMessages{}, err
|
||||
}
|
||||
|
||||
packet := e.rcvList.Front()
|
||||
e.rcvList.Remove(packet)
|
||||
e.rcvBufSize -= packet.data.Size()
|
||||
pkt := e.rcvList.Front()
|
||||
e.rcvList.Remove(pkt)
|
||||
e.rcvBufSize -= pkt.data.Size()
|
||||
|
||||
e.rcvMu.Unlock()
|
||||
|
||||
if addr != nil {
|
||||
*addr = packet.senderAddr
|
||||
*addr = pkt.senderAddr
|
||||
}
|
||||
|
||||
return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil
|
||||
return pkt.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: pkt.timestampNS}, nil
|
||||
}
|
||||
|
||||
// Write implements tcpip.Endpoint.Write.
|
||||
|
@ -602,7 +600,7 @@ func (e *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv bu
|
|||
wasEmpty := e.rcvBufSize == 0
|
||||
|
||||
// Push new packet into receive list and increment the buffer size.
|
||||
packet := &packet{
|
||||
pkt := &rawPacket{
|
||||
senderAddr: tcpip.FullAddress{
|
||||
NIC: route.NICID(),
|
||||
Addr: route.RemoteAddress,
|
||||
|
@ -611,11 +609,11 @@ func (e *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv bu
|
|||
|
||||
combinedVV := netHeader.ToVectorisedView()
|
||||
combinedVV.Append(vv)
|
||||
packet.data = combinedVV.Clone(packet.views[:])
|
||||
packet.timestampNS = e.stack.NowNanoseconds()
|
||||
pkt.data = combinedVV.Clone(pkt.views[:])
|
||||
pkt.timestampNS = e.stack.NowNanoseconds()
|
||||
|
||||
e.rcvList.PushBack(packet)
|
||||
e.rcvBufSize += packet.data.Size()
|
||||
e.rcvList.PushBack(pkt)
|
||||
e.rcvBufSize += pkt.data.Size()
|
||||
|
||||
e.rcvMu.Unlock()
|
||||
e.stats.PacketsReceived.Increment()
|
||||
|
|
|
@ -20,15 +20,15 @@ import (
|
|||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
)
|
||||
|
||||
// saveData saves packet.data field.
|
||||
func (p *packet) saveData() buffer.VectorisedView {
|
||||
// saveData saves rawPacket.data field.
|
||||
func (p *rawPacket) saveData() buffer.VectorisedView {
|
||||
// We cannot save p.data directly as p.data.views may alias to p.views,
|
||||
// which is not allowed by state framework (in-struct pointer).
|
||||
return p.data.Clone(nil)
|
||||
}
|
||||
|
||||
// loadData loads packet.data field.
|
||||
func (p *packet) loadData(data buffer.VectorisedView) {
|
||||
// loadData loads rawPacket.data field.
|
||||
func (p *rawPacket) loadData(data buffer.VectorisedView) {
|
||||
// NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization
|
||||
// here because data.views is not guaranteed to be loaded by now. Plus,
|
||||
// data.views will be allocated anyway so there really is little point
|
||||
|
@ -86,7 +86,9 @@ func (ep *endpoint) Resume(s *stack.Stack) {
|
|||
}
|
||||
}
|
||||
|
||||
if ep.associated {
|
||||
if err := ep.stack.RegisterRawTransportEndpoint(ep.RegisterNICID, ep.NetProto, ep.TransProto, ep); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,13 +17,19 @@ package raw
|
|||
import (
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/transport/packet"
|
||||
"gvisor.dev/gvisor/pkg/waiter"
|
||||
)
|
||||
|
||||
// EndpointFactory implements stack.UnassociatedEndpointFactory.
|
||||
// EndpointFactory implements stack.RawFactory.
|
||||
type EndpointFactory struct{}
|
||||
|
||||
// NewUnassociatedRawEndpoint implements stack.UnassociatedEndpointFactory.
|
||||
func (EndpointFactory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
|
||||
// NewUnassociatedEndpoint implements stack.RawFactory.NewUnassociatedEndpoint.
|
||||
func (EndpointFactory) NewUnassociatedEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
|
||||
return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */)
|
||||
}
|
||||
|
||||
// NewPacketEndpoint implements stack.RawFactory.NewPacketEndpoint.
|
||||
func (EndpointFactory) NewPacketEndpoint(stack *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
|
||||
return packet.NewEndpoint(stack, cooked, netProto, waiterQueue)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
package raw
|
||||
|
||||
// ElementMapper provides an identity mapping by default.
|
||||
//
|
||||
// This can be replaced to provide a struct that maps elements to linker
|
||||
// objects, if they are not the same. An ElementMapper is not typically
|
||||
// required if: Linker is left as is, Element is left as is, or Linker and
|
||||
// Element are the same type.
|
||||
type rawPacketElementMapper struct{}
|
||||
|
||||
// linkerFor maps an Element to a Linker.
|
||||
//
|
||||
// This default implementation should be inlined.
|
||||
//
|
||||
//go:nosplit
|
||||
func (rawPacketElementMapper) linkerFor(elem *rawPacket) *rawPacket { return elem }
|
||||
|
||||
// List is an intrusive list. Entries can be added to or removed from the list
|
||||
// in O(1) time and with no additional memory allocations.
|
||||
//
|
||||
// The zero value for List is an empty list ready to use.
|
||||
//
|
||||
// To iterate over a list (where l is a List):
|
||||
// for e := l.Front(); e != nil; e = e.Next() {
|
||||
// // do something with e.
|
||||
// }
|
||||
//
|
||||
// +stateify savable
|
||||
type rawPacketList struct {
|
||||
head *rawPacket
|
||||
tail *rawPacket
|
||||
}
|
||||
|
||||
// Reset resets list l to the empty state.
|
||||
func (l *rawPacketList) Reset() {
|
||||
l.head = nil
|
||||
l.tail = nil
|
||||
}
|
||||
|
||||
// Empty returns true iff the list is empty.
|
||||
func (l *rawPacketList) Empty() bool {
|
||||
return l.head == nil
|
||||
}
|
||||
|
||||
// Front returns the first element of list l or nil.
|
||||
func (l *rawPacketList) Front() *rawPacket {
|
||||
return l.head
|
||||
}
|
||||
|
||||
// Back returns the last element of list l or nil.
|
||||
func (l *rawPacketList) Back() *rawPacket {
|
||||
return l.tail
|
||||
}
|
||||
|
||||
// PushFront inserts the element e at the front of list l.
|
||||
func (l *rawPacketList) PushFront(e *rawPacket) {
|
||||
rawPacketElementMapper{}.linkerFor(e).SetNext(l.head)
|
||||
rawPacketElementMapper{}.linkerFor(e).SetPrev(nil)
|
||||
|
||||
if l.head != nil {
|
||||
rawPacketElementMapper{}.linkerFor(l.head).SetPrev(e)
|
||||
} else {
|
||||
l.tail = e
|
||||
}
|
||||
|
||||
l.head = e
|
||||
}
|
||||
|
||||
// PushBack inserts the element e at the back of list l.
|
||||
func (l *rawPacketList) PushBack(e *rawPacket) {
|
||||
rawPacketElementMapper{}.linkerFor(e).SetNext(nil)
|
||||
rawPacketElementMapper{}.linkerFor(e).SetPrev(l.tail)
|
||||
|
||||
if l.tail != nil {
|
||||
rawPacketElementMapper{}.linkerFor(l.tail).SetNext(e)
|
||||
} else {
|
||||
l.head = e
|
||||
}
|
||||
|
||||
l.tail = e
|
||||
}
|
||||
|
||||
// PushBackList inserts list m at the end of list l, emptying m.
|
||||
func (l *rawPacketList) PushBackList(m *rawPacketList) {
|
||||
if l.head == nil {
|
||||
l.head = m.head
|
||||
l.tail = m.tail
|
||||
} else if m.head != nil {
|
||||
rawPacketElementMapper{}.linkerFor(l.tail).SetNext(m.head)
|
||||
rawPacketElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
|
||||
|
||||
l.tail = m.tail
|
||||
}
|
||||
|
||||
m.head = nil
|
||||
m.tail = nil
|
||||
}
|
||||
|
||||
// InsertAfter inserts e after b.
|
||||
func (l *rawPacketList) InsertAfter(b, e *rawPacket) {
|
||||
a := rawPacketElementMapper{}.linkerFor(b).Next()
|
||||
rawPacketElementMapper{}.linkerFor(e).SetNext(a)
|
||||
rawPacketElementMapper{}.linkerFor(e).SetPrev(b)
|
||||
rawPacketElementMapper{}.linkerFor(b).SetNext(e)
|
||||
|
||||
if a != nil {
|
||||
rawPacketElementMapper{}.linkerFor(a).SetPrev(e)
|
||||
} else {
|
||||
l.tail = e
|
||||
}
|
||||
}
|
||||
|
||||
// InsertBefore inserts e before a.
|
||||
func (l *rawPacketList) InsertBefore(a, e *rawPacket) {
|
||||
b := rawPacketElementMapper{}.linkerFor(a).Prev()
|
||||
rawPacketElementMapper{}.linkerFor(e).SetNext(a)
|
||||
rawPacketElementMapper{}.linkerFor(e).SetPrev(b)
|
||||
rawPacketElementMapper{}.linkerFor(a).SetPrev(e)
|
||||
|
||||
if b != nil {
|
||||
rawPacketElementMapper{}.linkerFor(b).SetNext(e)
|
||||
} else {
|
||||
l.head = e
|
||||
}
|
||||
}
|
||||
|
||||
// Remove removes e from l.
|
||||
func (l *rawPacketList) Remove(e *rawPacket) {
|
||||
prev := rawPacketElementMapper{}.linkerFor(e).Prev()
|
||||
next := rawPacketElementMapper{}.linkerFor(e).Next()
|
||||
|
||||
if prev != nil {
|
||||
rawPacketElementMapper{}.linkerFor(prev).SetNext(next)
|
||||
} else {
|
||||
l.head = next
|
||||
}
|
||||
|
||||
if next != nil {
|
||||
rawPacketElementMapper{}.linkerFor(next).SetPrev(prev)
|
||||
} else {
|
||||
l.tail = prev
|
||||
}
|
||||
}
|
||||
|
||||
// Entry is a default implementation of Linker. Users can add anonymous fields
|
||||
// of this type to their structs to make them automatically implement the
|
||||
// methods needed by List.
|
||||
//
|
||||
// +stateify savable
|
||||
type rawPacketEntry struct {
|
||||
next *rawPacket
|
||||
prev *rawPacket
|
||||
}
|
||||
|
||||
// Next returns the entry that follows e in the list.
|
||||
func (e *rawPacketEntry) Next() *rawPacket {
|
||||
return e.next
|
||||
}
|
||||
|
||||
// Prev returns the entry that precedes e in the list.
|
||||
func (e *rawPacketEntry) Prev() *rawPacket {
|
||||
return e.prev
|
||||
}
|
||||
|
||||
// SetNext assigns 'entry' as the entry that follows e in the list.
|
||||
func (e *rawPacketEntry) SetNext(elem *rawPacket) {
|
||||
e.next = elem
|
||||
}
|
||||
|
||||
// SetPrev assigns 'entry' as the entry that precedes e in the list.
|
||||
func (e *rawPacketEntry) SetPrev(elem *rawPacket) {
|
||||
e.prev = elem
|
||||
}
|
|
@ -7,19 +7,19 @@ import (
|
|||
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
||||
)
|
||||
|
||||
func (x *packet) beforeSave() {}
|
||||
func (x *packet) save(m state.Map) {
|
||||
func (x *rawPacket) beforeSave() {}
|
||||
func (x *rawPacket) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
var data buffer.VectorisedView = x.saveData()
|
||||
m.SaveValue("data", data)
|
||||
m.Save("packetEntry", &x.packetEntry)
|
||||
m.Save("rawPacketEntry", &x.rawPacketEntry)
|
||||
m.Save("timestampNS", &x.timestampNS)
|
||||
m.Save("senderAddr", &x.senderAddr)
|
||||
}
|
||||
|
||||
func (x *packet) afterLoad() {}
|
||||
func (x *packet) load(m state.Map) {
|
||||
m.Load("packetEntry", &x.packetEntry)
|
||||
func (x *rawPacket) afterLoad() {}
|
||||
func (x *rawPacket) load(m state.Map) {
|
||||
m.Load("rawPacketEntry", &x.rawPacketEntry)
|
||||
m.Load("timestampNS", &x.timestampNS)
|
||||
m.Load("senderAddr", &x.senderAddr)
|
||||
m.LoadValue("data", new(buffer.VectorisedView), func(y interface{}) { x.loadData(y.(buffer.VectorisedView)) })
|
||||
|
@ -56,35 +56,35 @@ func (x *endpoint) load(m state.Map) {
|
|||
m.AfterLoad(x.afterLoad)
|
||||
}
|
||||
|
||||
func (x *packetList) beforeSave() {}
|
||||
func (x *packetList) save(m state.Map) {
|
||||
func (x *rawPacketList) beforeSave() {}
|
||||
func (x *rawPacketList) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
m.Save("head", &x.head)
|
||||
m.Save("tail", &x.tail)
|
||||
}
|
||||
|
||||
func (x *packetList) afterLoad() {}
|
||||
func (x *packetList) load(m state.Map) {
|
||||
func (x *rawPacketList) afterLoad() {}
|
||||
func (x *rawPacketList) load(m state.Map) {
|
||||
m.Load("head", &x.head)
|
||||
m.Load("tail", &x.tail)
|
||||
}
|
||||
|
||||
func (x *packetEntry) beforeSave() {}
|
||||
func (x *packetEntry) save(m state.Map) {
|
||||
func (x *rawPacketEntry) beforeSave() {}
|
||||
func (x *rawPacketEntry) save(m state.Map) {
|
||||
x.beforeSave()
|
||||
m.Save("next", &x.next)
|
||||
m.Save("prev", &x.prev)
|
||||
}
|
||||
|
||||
func (x *packetEntry) afterLoad() {}
|
||||
func (x *packetEntry) load(m state.Map) {
|
||||
func (x *rawPacketEntry) afterLoad() {}
|
||||
func (x *rawPacketEntry) load(m state.Map) {
|
||||
m.Load("next", &x.next)
|
||||
m.Load("prev", &x.prev)
|
||||
}
|
||||
|
||||
func init() {
|
||||
state.Register("raw.packet", (*packet)(nil), state.Fns{Save: (*packet).save, Load: (*packet).load})
|
||||
state.Register("raw.rawPacket", (*rawPacket)(nil), state.Fns{Save: (*rawPacket).save, Load: (*rawPacket).load})
|
||||
state.Register("raw.endpoint", (*endpoint)(nil), state.Fns{Save: (*endpoint).save, Load: (*endpoint).load})
|
||||
state.Register("raw.packetList", (*packetList)(nil), state.Fns{Save: (*packetList).save, Load: (*packetList).load})
|
||||
state.Register("raw.packetEntry", (*packetEntry)(nil), state.Fns{Save: (*packetEntry).save, Load: (*packetEntry).load})
|
||||
state.Register("raw.rawPacketList", (*rawPacketList)(nil), state.Fns{Save: (*rawPacketList).save, Load: (*rawPacketList).load})
|
||||
state.Register("raw.rawPacketEntry", (*rawPacketEntry)(nil), state.Fns{Save: (*rawPacketEntry).save, Load: (*rawPacketEntry).load})
|
||||
}
|
||||
|
|
|
@ -121,8 +121,15 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
|
|||
payloadLen = available
|
||||
}
|
||||
|
||||
payload := buffer.NewVectorisedView(len(netHeader), []buffer.View{netHeader})
|
||||
payload.Append(vv)
|
||||
// The buffers used by vv and netHeader may be used elsewhere
|
||||
// in the system. For example, a raw or packet socket may use
|
||||
// what UDP considers an unreachable destination. Thus we deep
|
||||
// copy vv and netHeader to prevent multiple ownership and SR
|
||||
// errors.
|
||||
newNetHeader := make(buffer.View, len(netHeader))
|
||||
copy(newNetHeader, netHeader)
|
||||
payload := buffer.NewVectorisedView(len(newNetHeader), []buffer.View{newNetHeader})
|
||||
payload.Append(vv.ToView().ToVectorisedView())
|
||||
payload.CapLength(payloadLen)
|
||||
|
||||
hdr := buffer.NewPrependable(headerLen)
|
||||
|
|
|
@ -922,7 +922,7 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
|
|||
HandleLocal: true,
|
||||
// Enable raw sockets for users with sufficient
|
||||
// privileges.
|
||||
UnassociatedFactory: raw.EndpointFactory{},
|
||||
RawFactory: raw.EndpointFactory{},
|
||||
})}
|
||||
|
||||
// Enable SACK Recovery.
|
||||
|
|
Loading…
Reference in New Issue