Merge release-20190806.1-295-g12235d5 (automated)

This commit is contained in:
gVisor bot 2019-10-21 20:26:03 +00:00
commit 975132cced
26 changed files with 1209 additions and 116 deletions

View File

@ -256,6 +256,17 @@ type SockAddrInet6 struct {
Scope_id uint32
}
// SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h.
type SockAddrLink struct {
Family uint16
Protocol uint16
InterfaceIndex int32
ARPHardwareType uint16
PacketType byte
HardwareAddrLen byte
HardwareAddr [8]byte
}
// UnixPathMax is the maximum length of the path in an AF_UNIX socket.
//
// From uapi/linux/un.h.
@ -278,6 +289,7 @@ type SockAddr interface {
func (s *SockAddrInet) implementsSockAddr() {}
func (s *SockAddrInet6) implementsSockAddr() {}
func (s *SockAddrLink) implementsSockAddr() {}
func (s *SockAddrUnix) implementsSockAddr() {}
func (s *SockAddrNetlink) implementsSockAddr() {}

View File

@ -53,6 +53,7 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
@ -296,6 +297,7 @@ func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue
var sockAddrInetSize = int(binary.Size(linux.SockAddrInet{}))
var sockAddrInet6Size = int(binary.Size(linux.SockAddrInet6{}))
var sockAddrLinkSize = int(binary.Size(linux.SockAddrLink{}))
// bytesToIPAddress converts an IPv4 or IPv6 address from the user to the
// netstack representation taking any addresses into account.
@ -307,12 +309,12 @@ func bytesToIPAddress(addr []byte) tcpip.Address {
}
// AddressAndFamily reads an sockaddr struct from the given address and
// converts it to the FullAddress format. It supports AF_UNIX, AF_INET and
// AF_INET6 addresses.
// converts it to the FullAddress format. It supports AF_UNIX, AF_INET,
// AF_INET6, and AF_PACKET addresses.
//
// strict indicates whether addresses with the AF_UNSPEC family are accepted of not.
//
// AddressAndFamily returns an address, its family.
// AddressAndFamily returns an address and its family.
func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) {
// Make sure we have at least 2 bytes for the address family.
if len(addr) < 2 {
@ -371,6 +373,22 @@ func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress,
}
return out, family, nil
case linux.AF_PACKET:
var a linux.SockAddrLink
if len(addr) < sockAddrLinkSize {
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrLinkSize], usermem.ByteOrder, &a)
if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize {
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
// TODO(b/129292371): Return protocol too.
return tcpip.FullAddress{
NIC: tcpip.NICID(a.InterfaceIndex),
Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]),
}, family, nil
case linux.AF_UNSPEC:
return tcpip.FullAddress{}, family, nil
@ -1951,12 +1969,14 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32)
return &out, uint32(2 + l)
}
return &out, uint32(3 + l)
case linux.AF_INET:
var out linux.SockAddrInet
copy(out.Addr[:], addr.Addr)
out.Family = linux.AF_INET
out.Port = htons(addr.Port)
return &out, uint32(binary.Size(out))
return &out, uint32(sockAddrInetSize)
case linux.AF_INET6:
var out linux.SockAddrInet6
if len(addr.Addr) == 4 {
@ -1972,7 +1992,17 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32)
if isLinkLocal(addr.Addr) {
out.Scope_id = uint32(addr.NIC)
}
return &out, uint32(binary.Size(out))
return &out, uint32(sockAddrInet6Size)
case linux.AF_PACKET:
// TODO(b/129292371): Return protocol too.
var out linux.SockAddrLink
out.Family = linux.AF_PACKET
out.InterfaceIndex = int32(addr.NIC)
out.HardwareAddrLen = header.EthernetAddressSize
copy(out.HardwareAddr[:], addr.Addr)
return &out, uint32(sockAddrLinkSize)
default:
return nil, 0
}

View File

@ -62,6 +62,10 @@ func getTransportProtocol(ctx context.Context, stype linux.SockType, protocol in
}
case linux.SOCK_RAW:
// TODO(b/142504697): "In order to create a raw socket, a
// process must have the CAP_NET_RAW capability in the user
// namespace that governs its network namespace." - raw(7)
// Raw sockets require CAP_NET_RAW.
creds := auth.CredentialsFromContext(ctx)
if !creds.HasCapability(linux.CAP_NET_RAW) {
@ -85,7 +89,8 @@ func getTransportProtocol(ctx context.Context, stype linux.SockType, protocol in
return 0, true, syserr.ErrProtocolNotSupported
}
// Socket creates a new socket object for the AF_INET or AF_INET6 family.
// Socket creates a new socket object for the AF_INET, AF_INET6, or AF_PACKET
// family.
func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) {
// Fail right away if we don't have a stack.
stack := t.NetworkContext()
@ -99,6 +104,12 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
return nil, nil
}
// Packet sockets are handled separately, since they are neither INET
// nor INET6 specific.
if p.family == linux.AF_PACKET {
return packetSocket(t, eps, stype, protocol)
}
// Figure out the transport protocol.
transProto, associated, err := getTransportProtocol(t, stype, protocol)
if err != nil {
@ -121,12 +132,47 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
return New(t, p.family, stype, int(transProto), wq, ep)
}
func packetSocket(t *kernel.Task, epStack *Stack, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) {
// TODO(b/142504697): "In order to create a packet socket, a process
// must have the CAP_NET_RAW capability in the user namespace that
// governs its network namespace." - packet(7)
// Packet sockets require CAP_NET_RAW.
creds := auth.CredentialsFromContext(t)
if !creds.HasCapability(linux.CAP_NET_RAW) {
return nil, syserr.ErrNotPermitted
}
// "cooked" packets don't contain link layer information.
var cooked bool
switch stype {
case linux.SOCK_DGRAM:
cooked = true
case linux.SOCK_RAW:
cooked = false
default:
return nil, syserr.ErrProtocolNotSupported
}
// protocol is passed in network byte order, but netstack wants it in
// host order.
netProto := tcpip.NetworkProtocolNumber(ntohs(uint16(protocol)))
wq := &waiter.Queue{}
ep, err := epStack.Stack.NewPacketEndpoint(cooked, netProto, wq)
if err != nil {
return nil, syserr.TranslateNetstackError(err)
}
return New(t, linux.AF_PACKET, stype, protocol, wq, ep)
}
// Pair just returns nil sockets (not supported).
func (*provider) Pair(*kernel.Task, linux.SockType, int) (*fs.File, *fs.File, *syserr.Error) {
return nil, nil, nil
}
// init registers socket providers for AF_INET and AF_INET6.
// init registers socket providers for AF_INET, AF_INET6, and AF_PACKET.
func init() {
// Providers backed by netstack.
p := []provider{
@ -138,6 +184,9 @@ func init() {
family: linux.AF_INET6,
netProto: ipv6.ProtocolNumber,
},
{
family: linux.AF_PACKET,
},
}
for i := range p {

View File

@ -50,6 +50,24 @@ const (
EthernetAddressSize = 6
)
const (
// EthernetProtocolAll is a catch-all for all protocols carried inside
// an ethernet frame. It is mainly used to create packet sockets that
// capture all traffic.
EthernetProtocolAll tcpip.NetworkProtocolNumber = 0x0003
// EthernetProtocolPUP is the PARC Universial Packet protocol ethertype.
EthernetProtocolPUP tcpip.NetworkProtocolNumber = 0x0200
)
// Ethertypes holds the protocol numbers describing the payload of an ethernet
// frame. These types aren't necessarily supported by netstack, but can be used
// to catch all traffic of a type via packet endpoints.
var Ethertypes = []tcpip.NetworkProtocolNumber{
EthernetProtocolAll,
EthernetProtocolPUP,
}
// SourceAddress returns the "MAC source" field of the ethernet frame header.
func (b Ethernet) SourceAddress() tcpip.LinkAddress {
return tcpip.LinkAddress(b[srcMAC:][:EthernetAddressSize])

View File

@ -72,7 +72,7 @@ func (e *Endpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.Vector
// InjectLinkAddr injects an inbound packet with a remote link address.
func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, vv buffer.VectorisedView) {
e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil))
e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil), nil /* linkHeader */)
}
// Attach saves the stack network-layer dispatcher for use later when packets
@ -134,5 +134,22 @@ func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, hdr buffer.Prepen
return nil
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *Endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
p := PacketInfo{
Header: packet.ToView(),
Proto: 0,
Payload: buffer.View{},
GSO: nil,
}
select {
case e.C <- p:
default:
}
return nil
}
// Wait implements stack.LinkEndpoint.Wait.
func (*Endpoint) Wait() {}

View File

@ -430,8 +430,13 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
return rawfile.NonBlockingWrite3(e.fds[0], hdr.View(), payload.ToView(), nil)
}
// WriteRawPacket writes a raw packet directly to the file descriptor.
func (e *endpoint) WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error {
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
return rawfile.NonBlockingWrite(e.fds[0], packet.ToView())
}
// InjectOutobund implements stack.InjectableEndpoint.InjectOutbound.
func (e *endpoint) InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error {
return rawfile.NonBlockingWrite(e.fds[0], packet)
}
@ -468,9 +473,9 @@ func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
e.dispatcher = dispatcher
}
// Inject injects an inbound packet.
func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv)
// InjectInbound injects an inbound packet.
func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
}
// NewInjectable creates a new fd-based InjectableEndpoint.

View File

@ -169,9 +169,10 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
eth header.Ethernet
)
if d.e.hdrSize > 0 {
eth := header.Ethernet(pkt)
eth = header.Ethernet(pkt)
p = eth.Type()
remote = eth.SourceAddress()
local = eth.DestinationAddress()
@ -189,6 +190,6 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
}
pkt = pkt[d.e.hdrSize:]
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}))
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}), buffer.View(eth))
return true, nil
}

View File

@ -118,9 +118,10 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
eth header.Ethernet
)
if d.e.hdrSize > 0 {
eth := header.Ethernet(d.views[0])
eth = header.Ethernet(d.views[0][:header.EthernetMinimumSize])
p = eth.Type()
remote = eth.SourceAddress()
local = eth.DestinationAddress()
@ -141,7 +142,7 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
vv := buffer.NewVectorisedView(n, d.views[:used])
vv.TrimFront(d.e.hdrSize)
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth))
// Prepare e.views for another packet: release used views.
for i := 0; i < used; i++ {
@ -271,9 +272,10 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
eth header.Ethernet
)
if d.e.hdrSize > 0 {
eth := header.Ethernet(d.views[k][0])
eth = header.Ethernet(d.views[k][0])
p = eth.Type()
remote = eth.SourceAddress()
local = eth.DestinationAddress()
@ -293,7 +295,7 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
used := d.capViews(k, int(n), BufConfig)
vv := buffer.NewVectorisedView(int(n), d.views[k][:used])
vv.TrimFront(d.e.hdrSize)
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth))
// Prepare e.views for another packet: release used views.
for i := 0; i < used; i++ {

View File

@ -23,6 +23,7 @@ package loopback
import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@ -70,6 +71,9 @@ func (*endpoint) LinkAddress() tcpip.LinkAddress {
return ""
}
// Wait implements stack.LinkEndpoint.Wait.
func (*endpoint) Wait() {}
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
// packets to the network-layer dispatcher.
func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
@ -81,10 +85,22 @@ func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependa
// Because we're immediately turning around and writing the packet back to the
// rx path, we intentionally don't preserve the remote and local link
// addresses from the stack.Route we're passed.
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv)
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
return nil
}
// Wait implements stack.LinkEndpoint.Wait.
func (*endpoint) Wait() {}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
// Reject the packet if it's shorter than an ethernet header.
if packet.Size() < header.EthernetMinimumSize {
return tcpip.ErrBadAddress
}
// There should be an ethernet header at the beginning of packet.
linkHeader := header.Ethernet(packet.First()[:header.EthernetMinimumSize])
packet.TrimFront(len(linkHeader))
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), packet, buffer.View(linkHeader))
return nil
}

View File

@ -116,7 +116,7 @@ func NewWithFile(lower stack.LinkEndpoint, file *os.File, snapLen uint32) (stack
// DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is
// called by the link-layer endpoint being wrapped when a packet arrives, and
// logs the packet before forwarding to the actual dispatcher.
func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
logPacket("recv", protocol, vv.First(), nil)
}
@ -147,7 +147,7 @@ func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local
panic(err)
}
}
e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv)
e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv, linkHeader)
}
// Attach implements the stack.LinkEndpoint interface. It saves the dispatcher
@ -218,8 +218,42 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
panic(err)
}
length -= len(hdrBuf)
if length > 0 {
for _, v := range payload.Views() {
logVectorisedView(payload, length, buf)
if _, err := e.file.Write(buf.Bytes()); err != nil {
panic(err)
}
}
return e.lower.WritePacket(r, gso, hdr, payload, protocol)
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
logPacket("send", 0, buffer.View("[raw packet, no header available]"), nil /* gso */)
}
if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 {
length := packet.Size()
if length > int(e.maxPCAPLen) {
length = int(e.maxPCAPLen)
}
buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length))
if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(packet.Size()))); err != nil {
panic(err)
}
logVectorisedView(packet, length, buf)
if _, err := e.file.Write(buf.Bytes()); err != nil {
panic(err)
}
}
return e.lower.WriteRawPacket(packet)
}
func logVectorisedView(vv buffer.VectorisedView, length int, buf *bytes.Buffer) {
if length <= 0 {
return
}
for _, v := range vv.Views() {
if len(v) > length {
v = v[:length]
}
@ -229,15 +263,9 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
}
length -= n
if length == 0 {
break
return
}
}
}
if _, err := e.file.Write(buf.Bytes()); err != nil {
panic(err)
}
}
return e.lower.WritePacket(r, gso, hdr, payload, protocol)
}
// Wait implements stack.LinkEndpoint.Wait.

View File

@ -40,6 +40,9 @@ type NIC struct {
endpoints map[NetworkEndpointID]*referencedNetworkEndpoint
addressRanges []tcpip.Subnet
mcastJoins map[NetworkEndpointID]int32
// packetEPs is protected by mu, but the contained PacketEndpoint
// values are not.
packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
stats NICStats
@ -78,7 +81,7 @@ const (
)
func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback bool) *NIC {
return &NIC{
nic := &NIC{
stack: stack,
id: id,
name: name,
@ -87,6 +90,7 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback
primary: make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint),
endpoints: make(map[NetworkEndpointID]*referencedNetworkEndpoint),
mcastJoins: make(map[NetworkEndpointID]int32),
packetEPs: make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint),
stats: NICStats{
Tx: DirectionStats{
Packets: &tcpip.StatCounter{},
@ -101,6 +105,16 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback
dad: make(map[tcpip.Address]dadState),
},
}
// Register supported packet endpoint protocols.
for _, netProto := range header.Ethertypes {
nic.packetEPs[netProto] = []PacketEndpoint{}
}
for _, netProto := range stack.networkProtocols {
nic.packetEPs[netProto.Number()] = []PacketEndpoint{}
}
return nic
}
// enable enables the NIC. enable will attach the link to its LinkEndpoint and
@ -631,7 +645,7 @@ func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address,
// Note that the ownership of the slice backing vv is retained by the caller.
// This rule applies only to the slice itself, not to the items of the slice;
// the ownership of the items is not retained by the caller.
func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
n.stats.Rx.Packets.Increment()
n.stats.Rx.Bytes.IncrementBy(uint64(vv.Size()))
@ -641,6 +655,26 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
return
}
// If no local link layer address is provided, assume it was sent
// directly to this NIC.
if local == "" {
local = n.linkEP.LinkAddress()
}
// Are any packet sockets listening for this network protocol?
n.mu.RLock()
packetEPs := n.packetEPs[protocol]
// Check whether there are packet sockets listening for every protocol.
// If we received a packet with protocol EthernetProtocolAll, then the
// previous for loop will have handled it.
if protocol != header.EthernetProtocolAll {
packetEPs = append(packetEPs, n.packetEPs[header.EthernetProtocolAll]...)
}
n.mu.RUnlock()
for _, ep := range packetEPs {
ep.HandlePacket(n.id, local, protocol, vv, linkHeader)
}
if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
n.stack.stats.IP.PacketsReceived.Increment()
}
@ -700,7 +734,10 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
return
}
// If a packet socket handled the packet, don't treat it as invalid.
if len(packetEPs) == 0 {
n.stack.stats.IP.InvalidAddressesReceived.Increment()
}
}
// DeliverTransportPacket delivers the packets to the appropriate transport
@ -856,6 +893,36 @@ const (
temporary
)
func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
n.mu.Lock()
defer n.mu.Unlock()
eps, ok := n.packetEPs[netProto]
if !ok {
return tcpip.ErrNotSupported
}
n.packetEPs[netProto] = append(eps, ep)
return nil
}
func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
n.mu.Lock()
defer n.mu.Unlock()
eps, ok := n.packetEPs[netProto]
if !ok {
return
}
for i, epOther := range eps {
if epOther == ep {
n.packetEPs[netProto] = append(eps[:i], eps[i+1:]...)
return
}
}
}
type referencedNetworkEndpoint struct {
ep NetworkEndpoint
nic *NIC

View File

@ -71,8 +71,8 @@ type TransportEndpoint interface {
// RawTransportEndpoint is the interface that needs to be implemented by raw
// transport protocol endpoints. RawTransportEndpoints receive the entire
// packet - including the link, network, and transport headers - as delivered
// to netstack.
// packet - including the network and transport headers - as delivered to
// netstack.
type RawTransportEndpoint interface {
// HandlePacket is called by the stack when new packets arrive to
// this transport endpoint. The packet contains all data from the link
@ -80,6 +80,22 @@ type RawTransportEndpoint interface {
HandlePacket(r *Route, netHeader buffer.View, packet buffer.VectorisedView)
}
// PacketEndpoint is the interface that needs to be implemented by packet
// transport protocol endpoints. These endpoints receive link layer headers in
// addition to whatever they contain (usually network and transport layer
// headers and a payload).
type PacketEndpoint interface {
// HandlePacket is called by the stack when new packets arrive that
// match the endpoint.
//
// Implementers should treat packet as immutable and should copy it
// before before modification.
//
// linkHeader may have a length of 0, in which case the PacketEndpoint
// should construct its own ethernet header for applications.
HandlePacket(nicid tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, packet buffer.VectorisedView, linkHeader buffer.View)
}
// TransportProtocol is the interface that needs to be implemented by transport
// protocols (e.g., tcp, udp) that want to be part of the networking stack.
type TransportProtocol interface {
@ -242,9 +258,10 @@ type NetworkProtocol interface {
// packets to the appropriate network endpoint after it has been handled by
// the data link layer.
type NetworkDispatcher interface {
// DeliverNetworkPacket finds the appropriate network protocol
// endpoint and hands the packet over for further processing.
DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView)
// DeliverNetworkPacket finds the appropriate network protocol endpoint
// and hands the packet over for further processing. linkHeader may have
// length 0 when the caller does not have ethernet data.
DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View)
}
// LinkEndpointCapabilities is the type associated with the capabilities
@ -301,6 +318,10 @@ type LinkEndpoint interface {
// r.LocalLinkAddress if it is provided.
WritePacket(r *Route, gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error
// WriteRawPacket writes a packet directly to the link. The packet
// should already have an ethernet header.
WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error
// Attach attaches the data link layer endpoint to the network-layer
// dispatcher of the stack.
Attach(dispatcher NetworkDispatcher)
@ -324,13 +345,14 @@ type LinkEndpoint interface {
type InjectableLinkEndpoint interface {
LinkEndpoint
// Inject injects an inbound packet.
Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView)
// InjectInbound injects an inbound packet.
InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView)
// WriteRawPacket writes a fully formed outbound packet directly to the link.
// InjectOutbound writes a fully formed outbound packet directly to the
// link.
//
// dest is used by endpoints with multiple raw destinations.
WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error
InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error
}
// A LinkAddressResolver is an extension to a NetworkProtocol that
@ -379,11 +401,16 @@ type LinkAddressCache interface {
RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker)
}
// UnassociatedEndpointFactory produces endpoints for writing packets not
// associated with a particular transport protocol. Such endpoints can be used
// to write arbitrary packets that include the IP header.
type UnassociatedEndpointFactory interface {
NewUnassociatedRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// RawFactory produces endpoints for writing various types of raw packets.
type RawFactory interface {
// NewUnassociatedEndpoint produces endpoints for writing packets not
// associated with a particular transport protocol. Such endpoints can
// be used to write arbitrary packets that include the network header.
NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// NewPacketEndpoint produces endpoints for reading and writing packets
// that include network and (when cooked is false) link layer headers.
NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
}
// GSOType is the type of GSO segments.

View File

@ -351,10 +351,9 @@ type Stack struct {
networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol
linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver
// unassociatedFactory creates unassociated endpoints. If nil, raw
// endpoints are disabled. It is set during Stack creation and is
// immutable.
unassociatedFactory UnassociatedEndpointFactory
// rawFactory creates raw endpoints. If nil, raw endpoints are
// disabled. It is set during Stack creation and is immutable.
rawFactory RawFactory
demux *transportDemuxer
@ -425,16 +424,16 @@ type Options struct {
// stack (false).
HandleLocal bool
// UnassociatedFactory produces unassociated endpoints raw endpoints.
// Raw endpoints are enabled only if this is non-nil.
UnassociatedFactory UnassociatedEndpointFactory
// NDPConfigs is the NDP configurations used by interfaces.
//
// By default, NDPConfigs will have a zero value for its
// DupAddrDetectTransmits field, implying that DAD will not be performed
// before assigning an address to a NIC.
NDPConfigs NDPConfigurations
// RawFactory produces raw endpoints. Raw endpoints are enabled only if
// this is non-nil.
RawFactory RawFactory
}
// TransportEndpointInfo holds useful information about a transport endpoint
@ -514,8 +513,8 @@ func New(opts Options) *Stack {
}
}
// Add the factory for unassociated endpoints, if present.
s.unassociatedFactory = opts.UnassociatedFactory
// Add the factory for raw endpoints, if present.
s.rawFactory = opts.RawFactory
// Create the global transport demuxer.
s.demux = newTransportDemuxer(s)
@ -650,12 +649,12 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp
// protocol. Raw endpoints receive all traffic for a given protocol regardless
// of address.
func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) {
if s.unassociatedFactory == nil {
if s.rawFactory == nil {
return nil, tcpip.ErrNotPermitted
}
if !associated {
return s.unassociatedFactory.NewUnassociatedRawEndpoint(s, network, transport, waiterQueue)
return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue)
}
t, ok := s.transportProtocols[transport]
@ -666,6 +665,16 @@ func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network
return t.proto.NewRawEndpoint(s, network, waiterQueue)
}
// NewPacketEndpoint creates a new packet endpoint listening for the given
// netProto.
func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
if s.rawFactory == nil {
return nil, tcpip.ErrNotPermitted
}
return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue)
}
// createNIC creates a NIC with the provided id and link-layer endpoint, and
// optionally enable it.
func (s *Stack) createNIC(id tcpip.NICID, name string, ep LinkEndpoint, enabled, loopback bool) *tcpip.Error {
@ -1135,6 +1144,109 @@ func (s *Stack) Resume() {
}
}
// RegisterPacketEndpoint registers ep with the stack, causing it to receive
// all traffic of the specified netProto on the given NIC. If nicID is 0, it
// receives traffic from every NIC.
func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
s.mu.Lock()
defer s.mu.Unlock()
// If no NIC is specified, capture on all devices.
if nicID == 0 {
// Register with each NIC.
for _, nic := range s.nics {
if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
s.unregisterPacketEndpointLocked(0, netProto, ep)
return err
}
}
return nil
}
// Capture on a specific device.
nic, ok := s.nics[nicID]
if !ok {
return tcpip.ErrUnknownNICID
}
if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
return err
}
return nil
}
// UnregisterPacketEndpoint unregisters ep for packets of the specified
// netProto from the specified NIC. If nicID is 0, ep is unregistered from all
// NICs.
func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
s.mu.Lock()
defer s.mu.Unlock()
s.unregisterPacketEndpointLocked(nicID, netProto, ep)
}
func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
// If no NIC is specified, unregister on all devices.
if nicID == 0 {
// Unregister with each NIC.
for _, nic := range s.nics {
nic.unregisterPacketEndpoint(netProto, ep)
}
return
}
// Unregister in a single device.
nic, ok := s.nics[nicID]
if !ok {
return
}
nic.unregisterPacketEndpoint(netProto, ep)
}
// WritePacket writes data directly to the specified NIC. It adds an ethernet
// header based on the arguments.
func (s *Stack) WritePacket(nicid tcpip.NICID, dst tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) *tcpip.Error {
s.mu.Lock()
nic, ok := s.nics[nicid]
s.mu.Unlock()
if !ok {
return tcpip.ErrUnknownDevice
}
// Add our own fake ethernet header.
ethFields := header.EthernetFields{
SrcAddr: nic.linkEP.LinkAddress(),
DstAddr: dst,
Type: netProto,
}
fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
fakeHeader.Encode(&ethFields)
ethHeader := buffer.View(fakeHeader).ToVectorisedView()
ethHeader.Append(payload)
if err := nic.linkEP.WriteRawPacket(ethHeader); err != nil {
return err
}
return nil
}
// WriteRawPacket writes data directly to the specified NIC without adding any
// headers.
func (s *Stack) WriteRawPacket(nicid tcpip.NICID, payload buffer.VectorisedView) *tcpip.Error {
s.mu.Lock()
nic, ok := s.nics[nicid]
s.mu.Unlock()
if !ok {
return tcpip.ErrUnknownDevice
}
if err := nic.linkEP.WriteRawPacket(payload); err != nil {
return err
}
return nil
}
// NetworkProtocolInstance returns the protocol instance in the stack for the
// specified network protocol. This method is public for protocol implementers
// and tests to use.

View File

@ -465,7 +465,7 @@ func (d *transportDemuxer) findEndpointLocked(eps *transportEndpoints, vv buffer
func (d *transportDemuxer) registerRawEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error {
eps, ok := d.protocol[protocolIDs{netProto, transProto}]
if !ok {
return nil
return tcpip.ErrNotSupported
}
eps.mu.Lock()

View File

@ -255,7 +255,7 @@ type FullAddress struct {
// This may not be used by all endpoint types.
NIC NICID
// Addr is the network address.
// Addr is the network or link layer address.
Addr Address
// Port is the transport port.

View File

@ -0,0 +1,363 @@
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package packet provides the implementation of packet sockets (see
// packet(7)). Packet sockets allow applications to:
//
// * manually write and inspect link, network, and transport headers
// * receive all traffic of a given network protocol, or all protocols
//
// Packet sockets are similar to raw sockets, but provide even more power to
// users, letting them effectively talk directly to the network device.
//
// Packet sockets skip the input and output iptables chains.
package packet
import (
"sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/iptables"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/waiter"
)
// +stateify savable
type packet struct {
packetEntry
// data holds the actual packet data, including any headers and
// payload.
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
// views is pre-allocated space to back data. As long as the packet is
// made up of fewer than 8 buffer.Views, no extra allocation is
// necessary to store packet data.
views [8]buffer.View `state:"nosave"`
// timestampNS is the unix time at which the packet was received.
timestampNS int64
// senderAddr is the network address of the sender.
senderAddr tcpip.FullAddress
}
// endpoint is the packet socket implementation of tcpip.Endpoint. It is legal
// to have goroutines make concurrent calls into the endpoint.
//
// Lock order:
// endpoint.mu
// endpoint.rcvMu
//
// +stateify savable
type endpoint struct {
stack.TransportEndpointInfo
// The following fields are initialized at creation time and are
// immutable.
stack *stack.Stack `state:"manual"`
netProto tcpip.NetworkProtocolNumber
waiterQueue *waiter.Queue
cooked bool
// The following fields are used to manage the receive queue and are
// protected by rcvMu.
rcvMu sync.Mutex `state:"nosave"`
rcvList packetList
rcvBufSizeMax int `state:".(int)"`
rcvBufSize int
rcvClosed bool
// The following fields are protected by mu.
mu sync.RWMutex `state:"nosave"`
sndBufSize int
closed bool
stats tcpip.TransportEndpointStats `state:"nosave"`
}
// NewEndpoint returns a new packet endpoint.
func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
ep := &endpoint{
stack: s,
TransportEndpointInfo: stack.TransportEndpointInfo{
NetProto: netProto,
},
cooked: cooked,
netProto: netProto,
waiterQueue: waiterQueue,
rcvBufSizeMax: 32 * 1024,
sndBufSize: 32 * 1024,
}
if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil {
return nil, err
}
return ep, nil
}
// Close implements tcpip.Endpoint.Close.
func (ep *endpoint) Close() {
ep.mu.Lock()
defer ep.mu.Unlock()
if ep.closed {
return
}
ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep)
ep.rcvMu.Lock()
defer ep.rcvMu.Unlock()
// Clear the receive list.
ep.rcvClosed = true
ep.rcvBufSize = 0
for !ep.rcvList.Empty() {
ep.rcvList.Remove(ep.rcvList.Front())
}
ep.closed = true
ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
}
// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
func (ep *endpoint) ModerateRecvBuf(copied int) {}
// IPTables implements tcpip.Endpoint.IPTables.
func (ep *endpoint) IPTables() (iptables.IPTables, error) {
return ep.stack.IPTables(), nil
}
// Read implements tcpip.Endpoint.Read.
func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
ep.rcvMu.Lock()
// If there's no data to read, return that read would block or that the
// endpoint is closed.
if ep.rcvList.Empty() {
err := tcpip.ErrWouldBlock
if ep.rcvClosed {
ep.stats.ReadErrors.ReadClosed.Increment()
err = tcpip.ErrClosedForReceive
}
ep.rcvMu.Unlock()
return buffer.View{}, tcpip.ControlMessages{}, err
}
packet := ep.rcvList.Front()
ep.rcvList.Remove(packet)
ep.rcvBufSize -= packet.data.Size()
ep.rcvMu.Unlock()
if addr != nil {
*addr = packet.senderAddr
}
return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil
}
func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
// TODO(b/129292371): Implement.
return 0, nil, tcpip.ErrInvalidOptionValue
}
// Peek implements tcpip.Endpoint.Peek.
func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
return 0, tcpip.ControlMessages{}, nil
}
// Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
// disconnected, and this function always returns tpcip.ErrNotSupported.
func (*endpoint) Disconnect() *tcpip.Error {
return tcpip.ErrNotSupported
}
// Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
// connected, and this function always returnes tcpip.ErrNotSupported.
func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
return tcpip.ErrNotSupported
}
// Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
// with Shutdown, and this function always returns tcpip.ErrNotSupported.
func (ep *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
return tcpip.ErrNotSupported
}
// Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
// Listen, and this function always returns tcpip.ErrNotSupported.
func (ep *endpoint) Listen(backlog int) *tcpip.Error {
return tcpip.ErrNotSupported
}
// Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
// Accept, and this function always returns tcpip.ErrNotSupported.
func (ep *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
return nil, nil, tcpip.ErrNotSupported
}
// Bind implements tcpip.Endpoint.Bind.
func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
// TODO(gvisor.dev/issue/173): Add Bind support.
// "By default, all packets of the specified protocol type are passed
// to a packet socket. To get packets only from a specific interface
// use bind(2) specifying an address in a struct sockaddr_ll to bind
// the packet socket to an interface. Fields used for binding are
// sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex."
// - packet(7).
return tcpip.ErrNotSupported
}
// GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
return tcpip.FullAddress{}, tcpip.ErrNotSupported
}
// GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
func (ep *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
// Even a connected socket doesn't return a remote address.
return tcpip.FullAddress{}, tcpip.ErrNotConnected
}
// Readiness implements tcpip.Endpoint.Readiness.
func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
// The endpoint is always writable.
result := waiter.EventOut & mask
// Determine whether the endpoint is readable.
if (mask & waiter.EventIn) != 0 {
ep.rcvMu.Lock()
if !ep.rcvList.Empty() || ep.rcvClosed {
result |= waiter.EventIn
}
ep.rcvMu.Unlock()
}
return result
}
// SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
// used with SetSockOpt, and this function always returns
// tcpip.ErrNotSupported.
func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
return tcpip.ErrNotSupported
}
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
return tcpip.ErrUnknownProtocolOption
}
// GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
func (ep *endpoint) GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error) {
return 0, tcpip.ErrNotSupported
}
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
return tcpip.ErrNotSupported
}
// HandlePacket implements stack.PacketEndpoint.HandlePacket.
func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, ethHeader buffer.View) {
ep.rcvMu.Lock()
// Drop the packet if our buffer is currently full.
if ep.rcvClosed {
ep.rcvMu.Unlock()
ep.stack.Stats().DroppedPackets.Increment()
ep.stats.ReceiveErrors.ClosedReceiver.Increment()
return
}
if ep.rcvBufSize >= ep.rcvBufSizeMax {
ep.rcvMu.Unlock()
ep.stack.Stats().DroppedPackets.Increment()
ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
return
}
wasEmpty := ep.rcvBufSize == 0
// Push new packet into receive list and increment the buffer size.
var packet packet
// TODO(b/129292371): Return network protocol.
if len(ethHeader) > 0 {
// Get info directly from the ethernet header.
hdr := header.Ethernet(ethHeader)
packet.senderAddr = tcpip.FullAddress{
NIC: nicid,
Addr: tcpip.Address(hdr.SourceAddress()),
}
} else {
// Guess the would-be ethernet header.
packet.senderAddr = tcpip.FullAddress{
NIC: nicid,
Addr: tcpip.Address(localAddr),
}
}
if ep.cooked {
// Cooked packets can simply be queued.
packet.data = vv.Clone(packet.views[:])
} else {
// Raw packets need their ethernet headers prepended before
// queueing.
if len(ethHeader) == 0 {
// We weren't provided with an actual ethernet header,
// so fake one.
ethFields := header.EthernetFields{
SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
DstAddr: localAddr,
Type: netProto,
}
fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
fakeHeader.Encode(&ethFields)
ethHeader = buffer.View(fakeHeader)
}
combinedVV := buffer.View(ethHeader).ToVectorisedView()
combinedVV.Append(vv)
packet.data = combinedVV.Clone(packet.views[:])
}
packet.timestampNS = ep.stack.NowNanoseconds()
ep.rcvList.PushBack(&packet)
ep.rcvBufSize += packet.data.Size()
ep.rcvMu.Unlock()
ep.stats.PacketsReceived.Increment()
// Notify waiters that there's data to be read.
if wasEmpty {
ep.waiterQueue.Notify(waiter.EventIn)
}
}
// State implements socket.Socket.State.
func (ep *endpoint) State() uint32 {
return 0
}
// Info returns a copy of the endpoint info.
func (ep *endpoint) Info() tcpip.EndpointInfo {
ep.mu.RLock()
// Make a copy of the endpoint info.
ret := ep.TransportEndpointInfo
ep.mu.RUnlock()
return &ret
}
// Stats returns a pointer to the endpoint stats.
func (ep *endpoint) Stats() tcpip.EndpointStats {
return &ep.stats
}

View File

@ -0,0 +1,72 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package packet
import (
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
// saveData saves packet.data field.
func (p *packet) saveData() buffer.VectorisedView {
// We cannot save p.data directly as p.data.views may alias to p.views,
// which is not allowed by state framework (in-struct pointer).
return p.data.Clone(nil)
}
// loadData loads packet.data field.
func (p *packet) loadData(data buffer.VectorisedView) {
// NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization
// here because data.views is not guaranteed to be loaded by now. Plus,
// data.views will be allocated anyway so there really is little point
// of utilizing p.views for data.views.
p.data = data
}
// beforeSave is invoked by stateify.
func (ep *endpoint) beforeSave() {
// Stop incoming packets from being handled (and mutate endpoint state).
// The lock will be released after saveRcvBufSizeMax(), which would have
// saved ep.rcvBufSizeMax and set it to 0 to continue blocking incoming
// packets.
ep.rcvMu.Lock()
}
// saveRcvBufSizeMax is invoked by stateify.
func (ep *endpoint) saveRcvBufSizeMax() int {
max := ep.rcvBufSizeMax
// Make sure no new packets will be handled regardless of the lock.
ep.rcvBufSizeMax = 0
// Release the lock acquired in beforeSave() so regular endpoint closing
// logic can proceed after save.
ep.rcvMu.Unlock()
return max
}
// loadRcvBufSizeMax is invoked by stateify.
func (ep *endpoint) loadRcvBufSizeMax(max int) {
ep.rcvBufSizeMax = max
}
// afterLoad is invoked by stateify.
func (ep *endpoint) afterLoad() {
// StackFromEnv is a stack used specifically for save/restore.
ep.stack = stack.StackFromEnv
// TODO(gvisor.dev/173): Once bind is supported, choose the right NIC.
if err := ep.stack.RegisterPacketEndpoint(0, ep.netProto, ep); err != nil {
panic(*err)
}
}

View File

@ -1,4 +1,4 @@
package raw
package packet
// ElementMapper provides an identity mapping by default.
//

View File

@ -0,0 +1,88 @@
// automatically generated by stateify.
package packet
import (
"gvisor.dev/gvisor/pkg/state"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
)
func (x *packet) beforeSave() {}
func (x *packet) save(m state.Map) {
x.beforeSave()
var data buffer.VectorisedView = x.saveData()
m.SaveValue("data", data)
m.Save("packetEntry", &x.packetEntry)
m.Save("timestampNS", &x.timestampNS)
m.Save("senderAddr", &x.senderAddr)
}
func (x *packet) afterLoad() {}
func (x *packet) load(m state.Map) {
m.Load("packetEntry", &x.packetEntry)
m.Load("timestampNS", &x.timestampNS)
m.Load("senderAddr", &x.senderAddr)
m.LoadValue("data", new(buffer.VectorisedView), func(y interface{}) { x.loadData(y.(buffer.VectorisedView)) })
}
func (x *endpoint) save(m state.Map) {
x.beforeSave()
var rcvBufSizeMax int = x.saveRcvBufSizeMax()
m.SaveValue("rcvBufSizeMax", rcvBufSizeMax)
m.Save("TransportEndpointInfo", &x.TransportEndpointInfo)
m.Save("netProto", &x.netProto)
m.Save("waiterQueue", &x.waiterQueue)
m.Save("cooked", &x.cooked)
m.Save("rcvList", &x.rcvList)
m.Save("rcvBufSize", &x.rcvBufSize)
m.Save("rcvClosed", &x.rcvClosed)
m.Save("sndBufSize", &x.sndBufSize)
m.Save("closed", &x.closed)
}
func (x *endpoint) load(m state.Map) {
m.Load("TransportEndpointInfo", &x.TransportEndpointInfo)
m.Load("netProto", &x.netProto)
m.Load("waiterQueue", &x.waiterQueue)
m.Load("cooked", &x.cooked)
m.Load("rcvList", &x.rcvList)
m.Load("rcvBufSize", &x.rcvBufSize)
m.Load("rcvClosed", &x.rcvClosed)
m.Load("sndBufSize", &x.sndBufSize)
m.Load("closed", &x.closed)
m.LoadValue("rcvBufSizeMax", new(int), func(y interface{}) { x.loadRcvBufSizeMax(y.(int)) })
m.AfterLoad(x.afterLoad)
}
func (x *packetList) beforeSave() {}
func (x *packetList) save(m state.Map) {
x.beforeSave()
m.Save("head", &x.head)
m.Save("tail", &x.tail)
}
func (x *packetList) afterLoad() {}
func (x *packetList) load(m state.Map) {
m.Load("head", &x.head)
m.Load("tail", &x.tail)
}
func (x *packetEntry) beforeSave() {}
func (x *packetEntry) save(m state.Map) {
x.beforeSave()
m.Save("next", &x.next)
m.Save("prev", &x.prev)
}
func (x *packetEntry) afterLoad() {}
func (x *packetEntry) load(m state.Map) {
m.Load("next", &x.next)
m.Load("prev", &x.prev)
}
func init() {
state.Register("packet.packet", (*packet)(nil), state.Fns{Save: (*packet).save, Load: (*packet).load})
state.Register("packet.endpoint", (*endpoint)(nil), state.Fns{Save: (*endpoint).save, Load: (*endpoint).load})
state.Register("packet.packetList", (*packetList)(nil), state.Fns{Save: (*packetList).save, Load: (*packetList).load})
state.Register("packet.packetEntry", (*packetEntry)(nil), state.Fns{Save: (*packetEntry).save, Load: (*packetEntry).load})
}

View File

@ -17,8 +17,7 @@
//
// * manually write and inspect transport layer headers and payloads
// * receive all traffic of a given transport protocol (e.g. ICMP or UDP)
// * optionally write and inspect network layer and link layer headers for
// packets
// * optionally write and inspect network layer headers of packets
//
// Raw sockets don't have any notion of ports, and incoming packets are
// demultiplexed solely by protocol number. Thus, a raw UDP endpoint will
@ -38,8 +37,8 @@ import (
)
// +stateify savable
type packet struct {
packetEntry
type rawPacket struct {
rawPacketEntry
// data holds the actual packet data, including any headers and
// payload.
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
@ -72,7 +71,7 @@ type endpoint struct {
// The following fields are used to manage the receive queue and are
// protected by rcvMu.
rcvMu sync.Mutex `state:"nosave"`
rcvList packetList
rcvList rawPacketList
rcvBufSizeMax int `state:".(int)"`
rcvBufSize int
rcvClosed bool
@ -90,7 +89,6 @@ type endpoint struct {
}
// NewEndpoint returns a raw endpoint for the given protocols.
// TODO(b/129292371): IP_HDRINCL and AF_PACKET.
func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
return newEndpoint(stack, netProto, transProto, waiterQueue, true /* associated */)
}
@ -187,17 +185,17 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
return buffer.View{}, tcpip.ControlMessages{}, err
}
packet := e.rcvList.Front()
e.rcvList.Remove(packet)
e.rcvBufSize -= packet.data.Size()
pkt := e.rcvList.Front()
e.rcvList.Remove(pkt)
e.rcvBufSize -= pkt.data.Size()
e.rcvMu.Unlock()
if addr != nil {
*addr = packet.senderAddr
*addr = pkt.senderAddr
}
return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil
return pkt.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: pkt.timestampNS}, nil
}
// Write implements tcpip.Endpoint.Write.
@ -602,7 +600,7 @@ func (e *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv bu
wasEmpty := e.rcvBufSize == 0
// Push new packet into receive list and increment the buffer size.
packet := &packet{
pkt := &rawPacket{
senderAddr: tcpip.FullAddress{
NIC: route.NICID(),
Addr: route.RemoteAddress,
@ -611,11 +609,11 @@ func (e *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv bu
combinedVV := netHeader.ToVectorisedView()
combinedVV.Append(vv)
packet.data = combinedVV.Clone(packet.views[:])
packet.timestampNS = e.stack.NowNanoseconds()
pkt.data = combinedVV.Clone(pkt.views[:])
pkt.timestampNS = e.stack.NowNanoseconds()
e.rcvList.PushBack(packet)
e.rcvBufSize += packet.data.Size()
e.rcvList.PushBack(pkt)
e.rcvBufSize += pkt.data.Size()
e.rcvMu.Unlock()
e.stats.PacketsReceived.Increment()

View File

@ -20,15 +20,15 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
// saveData saves packet.data field.
func (p *packet) saveData() buffer.VectorisedView {
// saveData saves rawPacket.data field.
func (p *rawPacket) saveData() buffer.VectorisedView {
// We cannot save p.data directly as p.data.views may alias to p.views,
// which is not allowed by state framework (in-struct pointer).
return p.data.Clone(nil)
}
// loadData loads packet.data field.
func (p *packet) loadData(data buffer.VectorisedView) {
// loadData loads rawPacket.data field.
func (p *rawPacket) loadData(data buffer.VectorisedView) {
// NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization
// here because data.views is not guaranteed to be loaded by now. Plus,
// data.views will be allocated anyway so there really is little point
@ -86,7 +86,9 @@ func (ep *endpoint) Resume(s *stack.Stack) {
}
}
if ep.associated {
if err := ep.stack.RegisterRawTransportEndpoint(ep.RegisterNICID, ep.NetProto, ep.TransProto, ep); err != nil {
panic(err)
}
}
}

View File

@ -17,13 +17,19 @@ package raw
import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/packet"
"gvisor.dev/gvisor/pkg/waiter"
)
// EndpointFactory implements stack.UnassociatedEndpointFactory.
// EndpointFactory implements stack.RawFactory.
type EndpointFactory struct{}
// NewUnassociatedRawEndpoint implements stack.UnassociatedEndpointFactory.
func (EndpointFactory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
// NewUnassociatedEndpoint implements stack.RawFactory.NewUnassociatedEndpoint.
func (EndpointFactory) NewUnassociatedEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */)
}
// NewPacketEndpoint implements stack.RawFactory.NewPacketEndpoint.
func (EndpointFactory) NewPacketEndpoint(stack *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
return packet.NewEndpoint(stack, cooked, netProto, waiterQueue)
}

View File

@ -0,0 +1,173 @@
package raw
// ElementMapper provides an identity mapping by default.
//
// This can be replaced to provide a struct that maps elements to linker
// objects, if they are not the same. An ElementMapper is not typically
// required if: Linker is left as is, Element is left as is, or Linker and
// Element are the same type.
type rawPacketElementMapper struct{}
// linkerFor maps an Element to a Linker.
//
// This default implementation should be inlined.
//
//go:nosplit
func (rawPacketElementMapper) linkerFor(elem *rawPacket) *rawPacket { return elem }
// List is an intrusive list. Entries can be added to or removed from the list
// in O(1) time and with no additional memory allocations.
//
// The zero value for List is an empty list ready to use.
//
// To iterate over a list (where l is a List):
// for e := l.Front(); e != nil; e = e.Next() {
// // do something with e.
// }
//
// +stateify savable
type rawPacketList struct {
head *rawPacket
tail *rawPacket
}
// Reset resets list l to the empty state.
func (l *rawPacketList) Reset() {
l.head = nil
l.tail = nil
}
// Empty returns true iff the list is empty.
func (l *rawPacketList) Empty() bool {
return l.head == nil
}
// Front returns the first element of list l or nil.
func (l *rawPacketList) Front() *rawPacket {
return l.head
}
// Back returns the last element of list l or nil.
func (l *rawPacketList) Back() *rawPacket {
return l.tail
}
// PushFront inserts the element e at the front of list l.
func (l *rawPacketList) PushFront(e *rawPacket) {
rawPacketElementMapper{}.linkerFor(e).SetNext(l.head)
rawPacketElementMapper{}.linkerFor(e).SetPrev(nil)
if l.head != nil {
rawPacketElementMapper{}.linkerFor(l.head).SetPrev(e)
} else {
l.tail = e
}
l.head = e
}
// PushBack inserts the element e at the back of list l.
func (l *rawPacketList) PushBack(e *rawPacket) {
rawPacketElementMapper{}.linkerFor(e).SetNext(nil)
rawPacketElementMapper{}.linkerFor(e).SetPrev(l.tail)
if l.tail != nil {
rawPacketElementMapper{}.linkerFor(l.tail).SetNext(e)
} else {
l.head = e
}
l.tail = e
}
// PushBackList inserts list m at the end of list l, emptying m.
func (l *rawPacketList) PushBackList(m *rawPacketList) {
if l.head == nil {
l.head = m.head
l.tail = m.tail
} else if m.head != nil {
rawPacketElementMapper{}.linkerFor(l.tail).SetNext(m.head)
rawPacketElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
l.tail = m.tail
}
m.head = nil
m.tail = nil
}
// InsertAfter inserts e after b.
func (l *rawPacketList) InsertAfter(b, e *rawPacket) {
a := rawPacketElementMapper{}.linkerFor(b).Next()
rawPacketElementMapper{}.linkerFor(e).SetNext(a)
rawPacketElementMapper{}.linkerFor(e).SetPrev(b)
rawPacketElementMapper{}.linkerFor(b).SetNext(e)
if a != nil {
rawPacketElementMapper{}.linkerFor(a).SetPrev(e)
} else {
l.tail = e
}
}
// InsertBefore inserts e before a.
func (l *rawPacketList) InsertBefore(a, e *rawPacket) {
b := rawPacketElementMapper{}.linkerFor(a).Prev()
rawPacketElementMapper{}.linkerFor(e).SetNext(a)
rawPacketElementMapper{}.linkerFor(e).SetPrev(b)
rawPacketElementMapper{}.linkerFor(a).SetPrev(e)
if b != nil {
rawPacketElementMapper{}.linkerFor(b).SetNext(e)
} else {
l.head = e
}
}
// Remove removes e from l.
func (l *rawPacketList) Remove(e *rawPacket) {
prev := rawPacketElementMapper{}.linkerFor(e).Prev()
next := rawPacketElementMapper{}.linkerFor(e).Next()
if prev != nil {
rawPacketElementMapper{}.linkerFor(prev).SetNext(next)
} else {
l.head = next
}
if next != nil {
rawPacketElementMapper{}.linkerFor(next).SetPrev(prev)
} else {
l.tail = prev
}
}
// Entry is a default implementation of Linker. Users can add anonymous fields
// of this type to their structs to make them automatically implement the
// methods needed by List.
//
// +stateify savable
type rawPacketEntry struct {
next *rawPacket
prev *rawPacket
}
// Next returns the entry that follows e in the list.
func (e *rawPacketEntry) Next() *rawPacket {
return e.next
}
// Prev returns the entry that precedes e in the list.
func (e *rawPacketEntry) Prev() *rawPacket {
return e.prev
}
// SetNext assigns 'entry' as the entry that follows e in the list.
func (e *rawPacketEntry) SetNext(elem *rawPacket) {
e.next = elem
}
// SetPrev assigns 'entry' as the entry that precedes e in the list.
func (e *rawPacketEntry) SetPrev(elem *rawPacket) {
e.prev = elem
}

View File

@ -7,19 +7,19 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/buffer"
)
func (x *packet) beforeSave() {}
func (x *packet) save(m state.Map) {
func (x *rawPacket) beforeSave() {}
func (x *rawPacket) save(m state.Map) {
x.beforeSave()
var data buffer.VectorisedView = x.saveData()
m.SaveValue("data", data)
m.Save("packetEntry", &x.packetEntry)
m.Save("rawPacketEntry", &x.rawPacketEntry)
m.Save("timestampNS", &x.timestampNS)
m.Save("senderAddr", &x.senderAddr)
}
func (x *packet) afterLoad() {}
func (x *packet) load(m state.Map) {
m.Load("packetEntry", &x.packetEntry)
func (x *rawPacket) afterLoad() {}
func (x *rawPacket) load(m state.Map) {
m.Load("rawPacketEntry", &x.rawPacketEntry)
m.Load("timestampNS", &x.timestampNS)
m.Load("senderAddr", &x.senderAddr)
m.LoadValue("data", new(buffer.VectorisedView), func(y interface{}) { x.loadData(y.(buffer.VectorisedView)) })
@ -56,35 +56,35 @@ func (x *endpoint) load(m state.Map) {
m.AfterLoad(x.afterLoad)
}
func (x *packetList) beforeSave() {}
func (x *packetList) save(m state.Map) {
func (x *rawPacketList) beforeSave() {}
func (x *rawPacketList) save(m state.Map) {
x.beforeSave()
m.Save("head", &x.head)
m.Save("tail", &x.tail)
}
func (x *packetList) afterLoad() {}
func (x *packetList) load(m state.Map) {
func (x *rawPacketList) afterLoad() {}
func (x *rawPacketList) load(m state.Map) {
m.Load("head", &x.head)
m.Load("tail", &x.tail)
}
func (x *packetEntry) beforeSave() {}
func (x *packetEntry) save(m state.Map) {
func (x *rawPacketEntry) beforeSave() {}
func (x *rawPacketEntry) save(m state.Map) {
x.beforeSave()
m.Save("next", &x.next)
m.Save("prev", &x.prev)
}
func (x *packetEntry) afterLoad() {}
func (x *packetEntry) load(m state.Map) {
func (x *rawPacketEntry) afterLoad() {}
func (x *rawPacketEntry) load(m state.Map) {
m.Load("next", &x.next)
m.Load("prev", &x.prev)
}
func init() {
state.Register("raw.packet", (*packet)(nil), state.Fns{Save: (*packet).save, Load: (*packet).load})
state.Register("raw.rawPacket", (*rawPacket)(nil), state.Fns{Save: (*rawPacket).save, Load: (*rawPacket).load})
state.Register("raw.endpoint", (*endpoint)(nil), state.Fns{Save: (*endpoint).save, Load: (*endpoint).load})
state.Register("raw.packetList", (*packetList)(nil), state.Fns{Save: (*packetList).save, Load: (*packetList).load})
state.Register("raw.packetEntry", (*packetEntry)(nil), state.Fns{Save: (*packetEntry).save, Load: (*packetEntry).load})
state.Register("raw.rawPacketList", (*rawPacketList)(nil), state.Fns{Save: (*rawPacketList).save, Load: (*rawPacketList).load})
state.Register("raw.rawPacketEntry", (*rawPacketEntry)(nil), state.Fns{Save: (*rawPacketEntry).save, Load: (*rawPacketEntry).load})
}

View File

@ -121,8 +121,15 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
payloadLen = available
}
payload := buffer.NewVectorisedView(len(netHeader), []buffer.View{netHeader})
payload.Append(vv)
// The buffers used by vv and netHeader may be used elsewhere
// in the system. For example, a raw or packet socket may use
// what UDP considers an unreachable destination. Thus we deep
// copy vv and netHeader to prevent multiple ownership and SR
// errors.
newNetHeader := make(buffer.View, len(netHeader))
copy(newNetHeader, netHeader)
payload := buffer.NewVectorisedView(len(newNetHeader), []buffer.View{newNetHeader})
payload.Append(vv.ToView().ToVectorisedView())
payload.CapLength(payloadLen)
hdr := buffer.NewPrependable(headerLen)

View File

@ -922,7 +922,7 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
HandleLocal: true,
// Enable raw sockets for users with sufficient
// privileges.
UnassociatedFactory: raw.EndpointFactory{},
RawFactory: raw.EndpointFactory{},
})}
// Enable SACK Recovery.