gvisor/runsc/boot/network.go

238 lines
7.0 KiB
Go
Raw Normal View History

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package boot
import (
"fmt"
"net"
"syscall"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
)
// Network exposes methods that can be used to configure a network stack.
type Network struct {
Stack *stack.Stack
}
// Route represents a route in the network stack.
type Route struct {
Destination net.IP
Mask net.IPMask
Gateway net.IP
}
// DefaultRoute represents a catch all route to the default gateway.
type DefaultRoute struct {
Route Route
Name string
}
// FDBasedLink configures an fd-based link.
type FDBasedLink struct {
Name string
MTU int
Addresses []net.IP
Routes []Route
GSOMaxSize uint32
LinkAddress net.HardwareAddr
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
NumChannels int
}
// LoopbackLink configures a loopback li nk.
type LoopbackLink struct {
Name string
Addresses []net.IP
Routes []Route
}
// CreateLinksAndRoutesArgs are arguments to CreateLinkAndRoutes.
type CreateLinksAndRoutesArgs struct {
// FilePayload contains the fds associated with the FDBasedLinks. The
// number of fd's should match the sum of the NumChannels field of the
// FDBasedLink entries below.
urpc.FilePayload
LoopbackLinks []LoopbackLink
FDBasedLinks []FDBasedLink
DefaultGateway DefaultRoute
}
// Empty returns true if route hasn't been set.
func (r *Route) Empty() bool {
return r.Destination == nil && r.Mask == nil && r.Gateway == nil
}
func (r *Route) toTcpipRoute(id tcpip.NICID) tcpip.Route {
return tcpip.Route{
Destination: ipToAddress(r.Destination),
Gateway: ipToAddress(r.Gateway),
Mask: ipToAddressMask(net.IP(r.Mask)),
NIC: id,
}
}
// CreateLinksAndRoutes creates links and routes in a network stack. It should
// only be called once.
func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct{}) error {
wantFDs := 0
for _, l := range args.FDBasedLinks {
wantFDs += l.NumChannels
}
if got := len(args.FilePayload.Files); got != wantFDs {
return fmt.Errorf("args.FilePayload.Files has %d FD's but we need %d entries based on FDBasedLinks", got, wantFDs)
}
var nicID tcpip.NICID
nicids := make(map[string]tcpip.NICID)
// Collect routes from all links.
var routes []tcpip.Route
// Loopback normally appear before other interfaces.
for _, link := range args.LoopbackLinks {
nicID++
nicids[link.Name] = nicID
linkEP := loopback.New()
log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses)
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses, true /* loopback */); err != nil {
return err
}
// Collect the routes from this link.
for _, r := range link.Routes {
routes = append(routes, r.toTcpipRoute(nicID))
}
}
fdOffset := 0
for _, link := range args.FDBasedLinks {
nicID++
nicids[link.Name] = nicID
FDs := []int{}
for j := 0; j < link.NumChannels; j++ {
// Copy the underlying FD.
oldFD := args.FilePayload.Files[fdOffset].Fd()
newFD, err := syscall.Dup(int(oldFD))
if err != nil {
return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
}
FDs = append(FDs, newFD)
fdOffset++
}
mac := tcpip.LinkAddress(link.LinkAddress)
linkEP, err := fdbased.New(&fdbased.Options{
FDs: FDs,
MTU: uint32(link.MTU),
EthernetHeader: true,
Address: mac,
Revert runsc to use RecvMMsg packet dispatcher. PacketMMap mode has issues due to a kernel bug. This change reverts us to using recvmmsg instead of a shared ring buffer to dispatch inbound packets. This will reduce performance but should be more stable under heavy load till PacketMMap is updated to use TPacketv3. See #210 for details. Perf difference between recvmmsg vs packetmmap. RecvMMsg : iperf3 -c 172.17.0.2 Connecting to host 172.17.0.2, port 5201 [ 4] local 172.17.0.1 port 43478 connected to 172.17.0.2 port 5201 [ ID] Interval Transfer Bandwidth Retr Cwnd [ 4] 0.00-1.00 sec 778 MBytes 6.53 Gbits/sec 4349 188 KBytes [ 4] 1.00-2.00 sec 786 MBytes 6.59 Gbits/sec 4395 212 KBytes [ 4] 2.00-3.00 sec 756 MBytes 6.34 Gbits/sec 3655 161 KBytes [ 4] 3.00-4.00 sec 782 MBytes 6.56 Gbits/sec 4419 175 KBytes [ 4] 4.00-5.00 sec 755 MBytes 6.34 Gbits/sec 4317 187 KBytes [ 4] 5.00-6.00 sec 774 MBytes 6.49 Gbits/sec 4002 173 KBytes [ 4] 6.00-7.00 sec 737 MBytes 6.18 Gbits/sec 3904 191 KBytes [ 4] 7.00-8.00 sec 530 MBytes 4.44 Gbits/sec 3318 189 KBytes [ 4] 8.00-9.00 sec 487 MBytes 4.09 Gbits/sec 2627 188 KBytes [ 4] 9.00-10.00 sec 770 MBytes 6.46 Gbits/sec 4221 170 KBytes - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bandwidth Retr [ 4] 0.00-10.00 sec 6.99 GBytes 6.00 Gbits/sec 39207 sender [ 4] 0.00-10.00 sec 6.99 GBytes 6.00 Gbits/sec receiver iperf Done. PacketMMap: bhaskerh@gvisor-bench:~/tensorflow$ iperf3 -c 172.17.0.2 Connecting to host 172.17.0.2, port 5201 [ 4] local 172.17.0.1 port 43496 connected to 172.17.0.2 port 5201 [ ID] Interval Transfer Bandwidth Retr Cwnd [ 4] 0.00-1.00 sec 657 MBytes 5.51 Gbits/sec 0 1.01 MBytes [ 4] 1.00-2.00 sec 1021 MBytes 8.56 Gbits/sec 0 1.01 MBytes [ 4] 2.00-3.00 sec 1.21 GBytes 10.4 Gbits/sec 45 1.01 MBytes [ 4] 3.00-4.00 sec 1018 MBytes 8.54 Gbits/sec 15 1.01 MBytes [ 4] 4.00-5.00 sec 1.28 GBytes 11.0 Gbits/sec 45 1.01 MBytes [ 4] 5.00-6.00 sec 1.38 GBytes 11.9 Gbits/sec 0 1.01 MBytes [ 4] 6.00-7.00 sec 1.34 GBytes 11.5 Gbits/sec 45 856 KBytes [ 4] 7.00-8.00 sec 1.23 GBytes 10.5 Gbits/sec 0 901 KBytes [ 4] 8.00-9.00 sec 1010 MBytes 8.48 Gbits/sec 0 923 KBytes [ 4] 9.00-10.00 sec 1.39 GBytes 11.9 Gbits/sec 0 960 KBytes - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bandwidth Retr [ 4] 0.00-10.00 sec 11.4 GBytes 9.83 Gbits/sec 150 sender [ 4] 0.00-10.00 sec 11.4 GBytes 9.83 Gbits/sec receiver Updates #210 PiperOrigin-RevId: 244968438 Change-Id: Id461b5cbff2dea6fa55cfc108ea246d8f83da20b
2019-04-24 02:06:09 +00:00
PacketDispatchMode: fdbased.RecvMMsg,
GSOMaxSize: link.GSOMaxSize,
RXChecksumOffload: true,
})
if err != nil {
return err
}
log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses, false /* loopback */); err != nil {
return err
}
// Collect the routes from this link.
for _, r := range link.Routes {
routes = append(routes, r.toTcpipRoute(nicID))
}
}
if !args.DefaultGateway.Route.Empty() {
nicID, ok := nicids[args.DefaultGateway.Name]
if !ok {
return fmt.Errorf("invalid interface name %q for default route", args.DefaultGateway.Name)
}
routes = append(routes, args.DefaultGateway.Route.toTcpipRoute(nicID))
}
log.Infof("Setting routes %+v", routes)
n.Stack.SetRouteTable(routes)
return nil
}
// createNICWithAddrs creates a NIC in the network stack and adds the given
// addresses.
func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID, addrs []net.IP, loopback bool) error {
if loopback {
if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(linkEP)); err != nil {
return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v, %v) failed: %v", id, name, linkEP, err)
}
} else {
if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(linkEP)); err != nil {
return fmt.Errorf("CreateNamedNIC(%v, %v, %v) failed: %v", id, name, linkEP, err)
}
}
// Always start with an arp address for the NIC.
if err := n.Stack.AddAddress(id, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, arp.ProtocolNumber, arp.ProtocolAddress, err)
}
for _, addr := range addrs {
proto, tcpipAddr := ipToAddressAndProto(addr)
if err := n.Stack.AddAddress(id, proto, tcpipAddr); err != nil {
return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, proto, tcpipAddr, err)
}
}
return nil
}
// ipToAddressAndProto converts IP to tcpip.Address and a protocol number.
//
// Note: don't use 'len(ip)' to determine IP version because length is always 16.
func ipToAddressAndProto(ip net.IP) (tcpip.NetworkProtocolNumber, tcpip.Address) {
if i4 := ip.To4(); i4 != nil {
return ipv4.ProtocolNumber, tcpip.Address(i4)
}
return ipv6.ProtocolNumber, tcpip.Address(ip)
}
// ipToAddress converts IP to tcpip.Address, ignoring the protocol.
func ipToAddress(ip net.IP) tcpip.Address {
_, addr := ipToAddressAndProto(ip)
return addr
}
// ipToAddressMask converts IP to tcpip.AddressMask, ignoring the protocol.
func ipToAddressMask(ip net.IP) tcpip.AddressMask {
_, addr := ipToAddressAndProto(ip)
return tcpip.AddressMask(addr)
}