383 lines
9.5 KiB
Go
383 lines
9.5 KiB
Go
// Copyright 2020 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tun
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
"gvisor.dev/gvisor/pkg/context"
|
|
"gvisor.dev/gvisor/pkg/sync"
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
"gvisor.dev/gvisor/pkg/tcpip/buffer"
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
|
"gvisor.dev/gvisor/pkg/tcpip/link/channel"
|
|
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
|
"gvisor.dev/gvisor/pkg/waiter"
|
|
)
|
|
|
|
const (
|
|
// drivers/net/tun.c:tun_net_init()
|
|
defaultDevMtu = 1500
|
|
|
|
// Queue length for outbound packet, arriving at fd side for read. Overflow
|
|
// causes packet drops. gVisor implementation-specific.
|
|
defaultDevOutQueueLen = 1024
|
|
)
|
|
|
|
var zeroMAC [6]byte
|
|
|
|
// Device is an opened /dev/net/tun device.
|
|
//
|
|
// +stateify savable
|
|
type Device struct {
|
|
waiter.Queue
|
|
|
|
mu sync.RWMutex `state:"nosave"`
|
|
endpoint *tunEndpoint
|
|
notifyHandle *channel.NotificationHandle
|
|
flags uint16
|
|
}
|
|
|
|
// beforeSave is invoked by stateify.
|
|
func (d *Device) beforeSave() {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
// TODO(b/110961832): Restore the device to stack. At this moment, the stack
|
|
// is not savable.
|
|
if d.endpoint != nil {
|
|
panic("/dev/net/tun does not support save/restore when a device is associated with it.")
|
|
}
|
|
}
|
|
|
|
// Release implements fs.FileOperations.Release.
|
|
func (d *Device) Release(ctx context.Context) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
// Decrease refcount if there is an endpoint associated with this file.
|
|
if d.endpoint != nil {
|
|
d.endpoint.RemoveNotify(d.notifyHandle)
|
|
d.endpoint.DecRef(ctx)
|
|
d.endpoint = nil
|
|
}
|
|
}
|
|
|
|
// SetIff services TUNSETIFF ioctl(2) request.
|
|
func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
if d.endpoint != nil {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
// Input validations.
|
|
isTun := flags&linux.IFF_TUN != 0
|
|
isTap := flags&linux.IFF_TAP != 0
|
|
supportedFlags := uint16(linux.IFF_TUN | linux.IFF_TAP | linux.IFF_NO_PI)
|
|
if isTap && isTun || !isTap && !isTun || flags&^supportedFlags != 0 {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
prefix := "tun"
|
|
if isTap {
|
|
prefix = "tap"
|
|
}
|
|
|
|
linkCaps := stack.CapabilityNone
|
|
if isTap {
|
|
linkCaps |= stack.CapabilityResolutionRequired
|
|
}
|
|
|
|
endpoint, err := attachOrCreateNIC(s, name, prefix, linkCaps)
|
|
if err != nil {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
d.endpoint = endpoint
|
|
d.notifyHandle = d.endpoint.AddNotify(d)
|
|
d.flags = flags
|
|
return nil
|
|
}
|
|
|
|
func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkEndpointCapabilities) (*tunEndpoint, error) {
|
|
for {
|
|
// 1. Try to attach to an existing NIC.
|
|
if name != "" {
|
|
if nic, found := s.GetNICByName(name); found {
|
|
endpoint, ok := nic.LinkEndpoint().(*tunEndpoint)
|
|
if !ok {
|
|
// Not a NIC created by tun device.
|
|
return nil, syserror.EOPNOTSUPP
|
|
}
|
|
if !endpoint.TryIncRef() {
|
|
// Race detected: NIC got deleted in between.
|
|
continue
|
|
}
|
|
return endpoint, nil
|
|
}
|
|
}
|
|
|
|
// 2. Creating a new NIC.
|
|
id := tcpip.NICID(s.UniqueID())
|
|
// TODO(gvisor.dev/1486): enable leak check for tunEndpoint.
|
|
endpoint := &tunEndpoint{
|
|
Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""),
|
|
stack: s,
|
|
nicID: id,
|
|
name: name,
|
|
isTap: prefix == "tap",
|
|
}
|
|
endpoint.Endpoint.LinkEPCapabilities = linkCaps
|
|
if endpoint.name == "" {
|
|
endpoint.name = fmt.Sprintf("%s%d", prefix, id)
|
|
}
|
|
err := s.CreateNICWithOptions(endpoint.nicID, endpoint, stack.NICOptions{
|
|
Name: endpoint.name,
|
|
})
|
|
switch err {
|
|
case nil:
|
|
return endpoint, nil
|
|
case tcpip.ErrDuplicateNICID:
|
|
// Race detected: A NIC has been created in between.
|
|
continue
|
|
default:
|
|
return nil, syserror.EINVAL
|
|
}
|
|
}
|
|
}
|
|
|
|
// Write inject one inbound packet to the network interface.
|
|
func (d *Device) Write(data []byte) (int64, error) {
|
|
d.mu.RLock()
|
|
endpoint := d.endpoint
|
|
d.mu.RUnlock()
|
|
if endpoint == nil {
|
|
return 0, syserror.EBADFD
|
|
}
|
|
if !endpoint.IsAttached() {
|
|
return 0, syserror.EIO
|
|
}
|
|
|
|
dataLen := int64(len(data))
|
|
|
|
// Packet information.
|
|
var pktInfoHdr PacketInfoHeader
|
|
if !d.hasFlags(linux.IFF_NO_PI) {
|
|
if len(data) < PacketInfoHeaderSize {
|
|
// Ignore bad packet.
|
|
return dataLen, nil
|
|
}
|
|
pktInfoHdr = PacketInfoHeader(data[:PacketInfoHeaderSize])
|
|
data = data[PacketInfoHeaderSize:]
|
|
}
|
|
|
|
// Ethernet header (TAP only).
|
|
var ethHdr header.Ethernet
|
|
if d.hasFlags(linux.IFF_TAP) {
|
|
if len(data) < header.EthernetMinimumSize {
|
|
// Ignore bad packet.
|
|
return dataLen, nil
|
|
}
|
|
ethHdr = header.Ethernet(data[:header.EthernetMinimumSize])
|
|
data = data[header.EthernetMinimumSize:]
|
|
}
|
|
|
|
// Try to determine network protocol number, default zero.
|
|
var protocol tcpip.NetworkProtocolNumber
|
|
switch {
|
|
case pktInfoHdr != nil:
|
|
protocol = pktInfoHdr.Protocol()
|
|
case ethHdr != nil:
|
|
protocol = ethHdr.Type()
|
|
}
|
|
|
|
// Try to determine remote link address, default zero.
|
|
var remote tcpip.LinkAddress
|
|
switch {
|
|
case ethHdr != nil:
|
|
remote = ethHdr.SourceAddress()
|
|
default:
|
|
remote = tcpip.LinkAddress(zeroMAC[:])
|
|
}
|
|
|
|
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
|
|
ReserveHeaderBytes: len(ethHdr),
|
|
Data: buffer.View(data).ToVectorisedView(),
|
|
})
|
|
copy(pkt.LinkHeader().Push(len(ethHdr)), ethHdr)
|
|
endpoint.InjectLinkAddr(protocol, remote, pkt)
|
|
return dataLen, nil
|
|
}
|
|
|
|
// Read reads one outgoing packet from the network interface.
|
|
func (d *Device) Read() ([]byte, error) {
|
|
d.mu.RLock()
|
|
endpoint := d.endpoint
|
|
d.mu.RUnlock()
|
|
if endpoint == nil {
|
|
return nil, syserror.EBADFD
|
|
}
|
|
|
|
for {
|
|
info, ok := endpoint.Read()
|
|
if !ok {
|
|
return nil, syserror.ErrWouldBlock
|
|
}
|
|
|
|
v, ok := d.encodePkt(&info)
|
|
if !ok {
|
|
// Ignore unsupported packet.
|
|
continue
|
|
}
|
|
return v, nil
|
|
}
|
|
}
|
|
|
|
// encodePkt encodes packet for fd side.
|
|
func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) {
|
|
var vv buffer.VectorisedView
|
|
|
|
// Packet information.
|
|
if !d.hasFlags(linux.IFF_NO_PI) {
|
|
hdr := make(PacketInfoHeader, PacketInfoHeaderSize)
|
|
hdr.Encode(&PacketInfoFields{
|
|
Protocol: info.Proto,
|
|
})
|
|
vv.AppendView(buffer.View(hdr))
|
|
}
|
|
|
|
// If the packet does not already have link layer header, and the route
|
|
// does not exist, we can't compute it. This is possibly a raw packet, tun
|
|
// device doesn't support this at the moment.
|
|
if info.Pkt.LinkHeader().View().IsEmpty() && info.Route.RemoteLinkAddress == "" {
|
|
return nil, false
|
|
}
|
|
|
|
// Ethernet header (TAP only).
|
|
if d.hasFlags(linux.IFF_TAP) {
|
|
// Add ethernet header if not provided.
|
|
if info.Pkt.LinkHeader().View().IsEmpty() {
|
|
d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress, info.Proto, info.Pkt)
|
|
}
|
|
vv.AppendView(info.Pkt.LinkHeader().View())
|
|
}
|
|
|
|
// Append upper headers.
|
|
vv.AppendView(info.Pkt.NetworkHeader().View())
|
|
vv.AppendView(info.Pkt.TransportHeader().View())
|
|
// Append data payload.
|
|
vv.Append(info.Pkt.Data)
|
|
|
|
return vv.ToView(), true
|
|
}
|
|
|
|
// Name returns the name of the attached network interface. Empty string if
|
|
// unattached.
|
|
func (d *Device) Name() string {
|
|
d.mu.RLock()
|
|
defer d.mu.RUnlock()
|
|
if d.endpoint != nil {
|
|
return d.endpoint.name
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Flags returns the flags set for d. Zero value if unset.
|
|
func (d *Device) Flags() uint16 {
|
|
d.mu.RLock()
|
|
defer d.mu.RUnlock()
|
|
return d.flags
|
|
}
|
|
|
|
func (d *Device) hasFlags(flags uint16) bool {
|
|
return d.flags&flags == flags
|
|
}
|
|
|
|
// Readiness implements watier.Waitable.Readiness.
|
|
func (d *Device) Readiness(mask waiter.EventMask) waiter.EventMask {
|
|
if mask&waiter.EventIn != 0 {
|
|
d.mu.RLock()
|
|
endpoint := d.endpoint
|
|
d.mu.RUnlock()
|
|
if endpoint != nil && endpoint.NumQueued() == 0 {
|
|
mask &= ^waiter.EventIn
|
|
}
|
|
}
|
|
return mask & (waiter.EventIn | waiter.EventOut)
|
|
}
|
|
|
|
// WriteNotify implements channel.Notification.WriteNotify.
|
|
func (d *Device) WriteNotify() {
|
|
d.Notify(waiter.EventIn)
|
|
}
|
|
|
|
// tunEndpoint is the link endpoint for the NIC created by the tun device.
|
|
//
|
|
// It is ref-counted as multiple opening files can attach to the same NIC.
|
|
// The last owner is responsible for deleting the NIC.
|
|
type tunEndpoint struct {
|
|
tunEndpointRefs
|
|
*channel.Endpoint
|
|
|
|
stack *stack.Stack
|
|
nicID tcpip.NICID
|
|
name string
|
|
isTap bool
|
|
}
|
|
|
|
// DecRef decrements refcount of e, removing NIC if it reaches 0.
|
|
func (e *tunEndpoint) DecRef(ctx context.Context) {
|
|
e.tunEndpointRefs.DecRef(func() {
|
|
e.stack.RemoveNIC(e.nicID)
|
|
})
|
|
}
|
|
|
|
// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
|
|
func (e *tunEndpoint) ARPHardwareType() header.ARPHardwareType {
|
|
if e.isTap {
|
|
return header.ARPHardwareEther
|
|
}
|
|
return header.ARPHardwareNone
|
|
}
|
|
|
|
// AddHeader implements stack.LinkEndpoint.AddHeader.
|
|
func (e *tunEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
|
|
if !e.isTap {
|
|
return
|
|
}
|
|
eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize))
|
|
hdr := &header.EthernetFields{
|
|
SrcAddr: local,
|
|
DstAddr: remote,
|
|
Type: protocol,
|
|
}
|
|
if hdr.SrcAddr == "" {
|
|
hdr.SrcAddr = e.LinkAddress()
|
|
}
|
|
|
|
eth.Encode(hdr)
|
|
}
|
|
|
|
// MaxHeaderLength returns the maximum size of the link layer header.
|
|
func (e *tunEndpoint) MaxHeaderLength() uint16 {
|
|
if e.isTap {
|
|
return header.EthernetMinimumSize
|
|
}
|
|
return 0
|
|
}
|