Merge pull request #1975 from nybidari:iptables

PiperOrigin-RevId: 300362789
This commit is contained in:
gVisor bot 2020-03-11 11:02:04 -07:00
commit 2c2622b942
8 changed files with 307 additions and 7 deletions

View File

@ -253,6 +253,50 @@ type XTErrorTarget struct {
// SizeOfXTErrorTarget is the size of an XTErrorTarget.
const SizeOfXTErrorTarget = 64
// Flag values for NfNATIPV4Range. The values indicate whether to map
// protocol specific part(ports) or IPs. It corresponds to values in
// include/uapi/linux/netfilter/nf_nat.h.
const (
NF_NAT_RANGE_MAP_IPS = 1 << 0
NF_NAT_RANGE_PROTO_SPECIFIED = 1 << 1
NF_NAT_RANGE_PROTO_RANDOM = 1 << 2
NF_NAT_RANGE_PERSISTENT = 1 << 3
NF_NAT_RANGE_PROTO_RANDOM_FULLY = 1 << 4
NF_NAT_RANGE_PROTO_RANDOM_ALL = (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
NF_NAT_RANGE_MASK = (NF_NAT_RANGE_MAP_IPS |
NF_NAT_RANGE_PROTO_SPECIFIED | NF_NAT_RANGE_PROTO_RANDOM |
NF_NAT_RANGE_PERSISTENT | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
)
// NfNATIPV4Range corresponds to struct nf_nat_ipv4_range
// in include/uapi/linux/netfilter/nf_nat.h. The fields are in
// network byte order.
type NfNATIPV4Range struct {
Flags uint32
MinIP [4]byte
MaxIP [4]byte
MinPort uint16
MaxPort uint16
}
// NfNATIPV4MultiRangeCompat corresponds to struct
// nf_nat_ipv4_multi_range_compat in include/uapi/linux/netfilter/nf_nat.h.
type NfNATIPV4MultiRangeCompat struct {
RangeSize uint32
RangeIPV4 NfNATIPV4Range
}
// XTRedirectTarget triggers a redirect when reached.
// Adding 4 bytes of padding to make the struct 8 byte aligned.
type XTRedirectTarget struct {
Target XTEntryTarget
NfRange NfNATIPV4MultiRangeCompat
_ [4]byte
}
// SizeOfXTRedirectTarget is the size of an XTRedirectTarget.
const SizeOfXTRedirectTarget = 56
// IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds
// to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h.
type IPTGetinfo struct {

View File

@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/iptables"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/usermem"
@ -35,6 +36,11 @@ import (
// shouldn't be reached - an error has occurred if we fall through to one.
const errorTargetName = "ERROR"
// redirectTargetName is used to mark targets as redirect targets. Redirect
// targets should be reached for only NAT and Mangle tables. These targets will
// change the destination port/destination IP for packets.
const redirectTargetName = "REDIRECT"
// Metadata is used to verify that we are correctly serializing and
// deserializing iptables into structs consumable by the iptables tool. We save
// a metadata struct when the tables are written, and when they are read out we
@ -240,6 +246,8 @@ func marshalTarget(target iptables.Target) []byte {
return marshalErrorTarget(tg.Name)
case iptables.ReturnTarget:
return marshalStandardTarget(iptables.RuleReturn)
case iptables.RedirectTarget:
return marshalRedirectTarget()
case JumpTarget:
return marshalJumpTarget(tg)
default:
@ -276,6 +284,19 @@ func marshalErrorTarget(errorName string) []byte {
return binary.Marshal(ret, usermem.ByteOrder, target)
}
func marshalRedirectTarget() []byte {
// This is a redirect target named redirect
target := linux.XTRedirectTarget{
Target: linux.XTEntryTarget{
TargetSize: linux.SizeOfXTRedirectTarget,
},
}
copy(target.Target.Name[:], redirectTargetName)
ret := make([]byte, 0, linux.SizeOfXTRedirectTarget)
return binary.Marshal(ret, usermem.ByteOrder, target)
}
func marshalJumpTarget(jt JumpTarget) []byte {
nflog("convert to binary: marshalling jump target")
@ -345,6 +366,8 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
switch replace.Name.String() {
case iptables.TablenameFilter:
table = iptables.EmptyFilterTable()
case iptables.TablenameNat:
table = iptables.EmptyNatTable()
default:
nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String())
return syserr.ErrInvalidArgument
@ -404,7 +427,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
return syserr.ErrInvalidArgument
}
target, err := parseTarget(optVal[:targetSize])
target, err := parseTarget(filter, optVal[:targetSize])
if err != nil {
nflog("failed to parse target: %v", err)
return syserr.ErrInvalidArgument
@ -495,10 +518,11 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
}
// TODO(gvisor.dev/issue/170): Support other chains.
// Since we only support modifying the INPUT chain right now, make sure
// all other chains point to ACCEPT rules.
// Since we only support modifying the INPUT chain and redirect for
// PREROUTING chain right now, make sure all other chains point to
// ACCEPT rules.
for hook, ruleIdx := range table.BuiltinChains {
if hook != iptables.Input {
if hook != iptables.Input && hook != iptables.Prerouting {
if _, ok := table.Rules[ruleIdx].Target.(iptables.AcceptTarget); !ok {
nflog("hook %d is unsupported.", hook)
return syserr.ErrInvalidArgument
@ -570,7 +594,7 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma
// parseTarget parses a target from optVal. optVal should contain only the
// target.
func parseTarget(optVal []byte) (iptables.Target, error) {
func parseTarget(filter iptables.IPHeaderFilter, optVal []byte) (iptables.Target, error) {
nflog("set entries: parsing target of size %d", len(optVal))
if len(optVal) < linux.SizeOfXTEntryTarget {
return nil, fmt.Errorf("optVal has insufficient size for entry target %d", len(optVal))
@ -620,6 +644,55 @@ func parseTarget(optVal []byte) (iptables.Target, error) {
nflog("set entries: user-defined target %q", name)
return iptables.UserChainTarget{Name: name}, nil
}
case redirectTargetName:
// Redirect target.
if len(optVal) < linux.SizeOfXTRedirectTarget {
return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal))
}
if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber {
return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
}
var redirectTarget linux.XTRedirectTarget
buf = optVal[:linux.SizeOfXTRedirectTarget]
binary.Unmarshal(buf, usermem.ByteOrder, &redirectTarget)
// Copy linux.XTRedirectTarget to iptables.RedirectTarget.
var target iptables.RedirectTarget
nfRange := redirectTarget.NfRange
// RangeSize should be 1.
if nfRange.RangeSize != 1 {
return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
}
// TODO(gvisor.dev/issue/170): Check if the flags are valid.
// Also check if we need to map ports or IP.
// For now, redirect target only supports destination port change.
// Port range and IP range are not supported yet.
if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 {
return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
}
target.RangeProtoSpecified = true
target.MinIP = tcpip.Address(nfRange.RangeIPV4.MinIP[:])
target.MaxIP = tcpip.Address(nfRange.RangeIPV4.MaxIP[:])
// TODO(gvisor.dev/issue/170): Port range is not supported yet.
if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort {
return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
}
// Convert port from big endian to little endian.
port := make([]byte, 2)
binary.BigEndian.PutUint16(port, nfRange.RangeIPV4.MinPort)
target.MinPort = binary.LittleEndian.Uint16(port)
binary.BigEndian.PutUint16(port, nfRange.RangeIPV4.MaxPort)
target.MaxPort = binary.LittleEndian.Uint16(port)
return target, nil
}
// Unknown target.

View File

@ -135,6 +135,27 @@ func EmptyFilterTable() Table {
}
}
// EmptyNatTable returns a Table with no rules and the filter table chains
// mapped to HookUnset.
func EmptyNatTable() Table {
return Table{
Rules: []Rule{},
BuiltinChains: map[Hook]int{
Prerouting: HookUnset,
Input: HookUnset,
Output: HookUnset,
Postrouting: HookUnset,
},
Underflows: map[Hook]int{
Prerouting: HookUnset,
Input: HookUnset,
Output: HookUnset,
Postrouting: HookUnset,
},
UserChains: map[string]int{},
}
}
// A chainVerdict is what a table decides should be done with a packet.
type chainVerdict int
@ -240,6 +261,12 @@ func (it *IPTables) checkChain(hook Hook, pkt tcpip.PacketBuffer, table Table, r
func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) {
rule := table.Rules[ruleIdx]
// If pkt.NetworkHeader hasn't been set yet, it will be contained in
// pkt.Data.First().
if pkt.NetworkHeader == nil {
pkt.NetworkHeader = pkt.Data.First()
}
// First check whether the packet matches the IP header filter.
// TODO(gvisor.dev/issue/170): Support other fields of the filter.
if rule.Filter.Protocol != 0 && rule.Filter.Protocol != header.IPv4(pkt.NetworkHeader).TransportProtocol() {

View File

@ -17,6 +17,7 @@ package iptables
import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
// AcceptTarget accepts packets.
@ -63,3 +64,81 @@ type ReturnTarget struct{}
func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, int) {
return RuleReturn, 0
}
// RedirectTarget redirects the packet by modifying the destination port/IP.
// Min and Max values for IP and Ports in the struct indicate the range of
// values which can be used to redirect.
type RedirectTarget struct {
// TODO(gvisor.dev/issue/170): Other flags need to be added after
// we support them.
// RangeProtoSpecified flag indicates single port is specified to
// redirect.
RangeProtoSpecified bool
// Min address used to redirect.
MinIP tcpip.Address
// Max address used to redirect.
MaxIP tcpip.Address
// Min port used to redirect.
MinPort uint16
// Max port used to redirect.
MaxPort uint16
}
// Action implements Target.Action.
// TODO(gvisor.dev/issue/170): Parse headers without copying. The current
// implementation only works for PREROUTING and calls pkt.Clone(), neither
// of which should be the case.
func (rt RedirectTarget) Action(pkt tcpip.PacketBuffer) (RuleVerdict, int) {
newPkt := pkt.Clone()
// Set network header.
headerView := newPkt.Data.First()
netHeader := header.IPv4(headerView)
newPkt.NetworkHeader = headerView[:header.IPv4MinimumSize]
hlen := int(netHeader.HeaderLength())
tlen := int(netHeader.TotalLength())
newPkt.Data.TrimFront(hlen)
newPkt.Data.CapLength(tlen - hlen)
// TODO(gvisor.dev/issue/170): Change destination address to
// loopback or interface address on which the packet was
// received.
// TODO(gvisor.dev/issue/170): Check Flags in RedirectTarget if
// we need to change dest address (for OUTPUT chain) or ports.
switch protocol := netHeader.TransportProtocol(); protocol {
case header.UDPProtocolNumber:
var udpHeader header.UDP
if newPkt.TransportHeader != nil {
udpHeader = header.UDP(newPkt.TransportHeader)
} else {
if len(pkt.Data.First()) < header.UDPMinimumSize {
return RuleDrop, 0
}
udpHeader = header.UDP(newPkt.Data.First())
}
udpHeader.SetDestinationPort(rt.MinPort)
case header.TCPProtocolNumber:
var tcpHeader header.TCP
if newPkt.TransportHeader != nil {
tcpHeader = header.TCP(newPkt.TransportHeader)
} else {
if len(pkt.Data.First()) < header.TCPMinimumSize {
return RuleDrop, 0
}
tcpHeader = header.TCP(newPkt.TransportHeader)
}
// TODO(gvisor.dev/issue/170): Need to recompute checksum
// and implement nat connection tracking to support TCP.
tcpHeader.SetDestinationPort(rt.MinPort)
default:
return RuleDrop, 0
}
return RuleAccept, 0
}

View File

@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/iptables"
)
var ipv4BroadcastAddr = tcpip.ProtocolAddress{
@ -1116,6 +1117,7 @@ func (n *NIC) isInGroup(addr tcpip.Address) bool {
func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt tcpip.PacketBuffer) {
r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */)
r.RemoteLinkAddress = remotelinkAddr
ref.ep.HandlePacket(&r, pkt)
ref.decRef()
}
@ -1186,6 +1188,16 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
n.stack.stats.IP.InvalidSourceAddressesReceived.Increment()
return
}
// TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet.
if protocol == header.IPv4ProtocolNumber {
ipt := n.stack.IPTables()
if ok := ipt.Check(iptables.Prerouting, pkt); !ok {
// iptables is telling us to drop the packet.
return
}
}
if ref := n.getRef(protocol, dst); ref != nil {
handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt)
return

View File

@ -196,12 +196,24 @@ func TestNATRedirectUDPPort(t *testing.T) {
}
}
func TestNATRedirectTCPPort(t *testing.T) {
if err := singleTest(NATRedirectTCPPort{}); err != nil {
t.Fatal(err)
}
}
func TestNATDropUDP(t *testing.T) {
if err := singleTest(NATDropUDP{}); err != nil {
t.Fatal(err)
}
}
func TestNATAcceptAll(t *testing.T) {
if err := singleTest(NATAcceptAll{}); err != nil {
t.Fatal(err)
}
}
func TestFilterInputDropTCPDestPort(t *testing.T) {
if err := singleTest(FilterInputDropTCPDestPort{}); err != nil {
t.Fatal(err)

View File

@ -25,7 +25,9 @@ const (
func init() {
RegisterTestCase(NATRedirectUDPPort{})
RegisterTestCase(NATRedirectTCPPort{})
RegisterTestCase(NATDropUDP{})
RegisterTestCase(NATAcceptAll{})
}
// NATRedirectUDPPort tests that packets are redirected to different port.
@ -45,6 +47,7 @@ func (NATRedirectUDPPort) ContainerAction(ip net.IP) error {
if err := listenUDP(redirectPort, sendloopDuration); err != nil {
return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err)
}
return nil
}
@ -53,6 +56,29 @@ func (NATRedirectUDPPort) LocalAction(ip net.IP) error {
return sendUDPLoop(ip, acceptPort, sendloopDuration)
}
// NATRedirectTCPPort tests that connections are redirected on specified ports.
type NATRedirectTCPPort struct{}
// Name implements TestCase.Name.
func (NATRedirectTCPPort) Name() string {
return "NATRedirectTCPPort"
}
// ContainerAction implements TestCase.ContainerAction.
func (NATRedirectTCPPort) ContainerAction(ip net.IP) error {
if err := natTable("-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil {
return err
}
// Listen for TCP packets on redirect port.
return listenTCP(redirectPort, sendloopDuration)
}
// LocalAction implements TestCase.LocalAction.
func (NATRedirectTCPPort) LocalAction(ip net.IP) error {
return connectTCP(ip, dropPort, acceptPort, sendloopDuration)
}
// NATDropUDP tests that packets are not received in ports other than redirect port.
type NATDropUDP struct{}
@ -78,3 +104,29 @@ func (NATDropUDP) ContainerAction(ip net.IP) error {
func (NATDropUDP) LocalAction(ip net.IP) error {
return sendUDPLoop(ip, acceptPort, sendloopDuration)
}
// NATAcceptAll tests that all UDP packets are accepted.
type NATAcceptAll struct{}
// Name implements TestCase.Name.
func (NATAcceptAll) Name() string {
return "NATAcceptAll"
}
// ContainerAction implements TestCase.ContainerAction.
func (NATAcceptAll) ContainerAction(ip net.IP) error {
if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "ACCEPT"); err != nil {
return err
}
if err := listenUDP(acceptPort, sendloopDuration); err != nil {
return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, err)
}
return nil
}
// LocalAction implements TestCase.LocalAction.
func (NATAcceptAll) LocalAction(ip net.IP) error {
return sendUDPLoop(ip, acceptPort, sendloopDuration)
}

View File

@ -420,8 +420,9 @@ TEST_P(CookedPacketTest, BindDrop) {
// Bind with invalid address.
TEST_P(CookedPacketTest, BindFail) {
// Null address.
ASSERT_THAT(bind(socket_, nullptr, sizeof(struct sockaddr)),
SyscallFailsWithErrno(EFAULT));
ASSERT_THAT(
bind(socket_, nullptr, sizeof(struct sockaddr)),
AnyOf(SyscallFailsWithErrno(EFAULT), SyscallFailsWithErrno(EINVAL)));
// Address of size 1.
uint8_t addr = 0;