diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index e96722069..a38a3a94e 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -85,8 +85,11 @@ go_test( deps = [ "//pkg/control/server", "//pkg/log", + "//pkg/p9", "//pkg/sentry/context/contexttest", "//pkg/sentry/fs", + "//pkg/unet", + "//runsc/fsgofer", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", ], ) diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 28a1600cd..01da535af 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -60,28 +60,23 @@ func (p PlatformType) String() string { type FileAccessType int const ( - // FileAccessProxy sends IO requests to a Gofer process that validates the + // FileAccessShared sends IO requests to a Gofer process that validates the // requests and forwards them to the host. - FileAccessProxy FileAccessType = iota + FileAccessShared FileAccessType = iota - // FileAccessProxyExclusive is the same as FileAccessProxy, but enables + // FileAccessExclusive is the same as FileAccessShared, but enables // extra caching for improved performance. It should only be used if // the sandbox has exclusive access to the filesystem. - FileAccessProxyExclusive - - // FileAccessDirect connects the sandbox directly to the host filesystem. - FileAccessDirect + FileAccessExclusive ) // MakeFileAccessType converts type from string. func MakeFileAccessType(s string) (FileAccessType, error) { switch s { - case "proxy-shared": - return FileAccessProxy, nil - case "proxy-exclusive": - return FileAccessProxyExclusive, nil - case "direct": - return FileAccessDirect, nil + case "shared": + return FileAccessShared, nil + case "exclusive": + return FileAccessExclusive, nil default: return 0, fmt.Errorf("invalid file access type %q", s) } @@ -89,12 +84,10 @@ func MakeFileAccessType(s string) (FileAccessType, error) { func (f FileAccessType) String() string { switch f { - case FileAccessProxy: - return "proxy-shared" - case FileAccessProxyExclusive: - return "proxy-exclusive" - case FileAccessDirect: - return "direct" + case FileAccessShared: + return "shared" + case FileAccessExclusive: + return "exclusive" default: return fmt.Sprintf("unknown(%d)", f) } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index f864b1f45..1a0c426ab 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -264,45 +264,6 @@ var allowedSyscalls = seccomp.SyscallRules{ }, } -// whitelistFSFilters returns syscalls made by whitelistFS. Using WhitelistFS -// is less secure because it runs inside the Sentry and must be able to perform -// file operations that would otherwise be disabled by seccomp when a Gofer is -// used. When whitelistFS is not used, openning new FD in the Sentry is -// disallowed. -func whitelistFSFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_ACCESS: {}, - syscall.SYS_FCHMOD: {}, - syscall.SYS_FSTAT: {}, - syscall.SYS_FSYNC: {}, - syscall.SYS_FTRUNCATE: {}, - syscall.SYS_GETCWD: {}, - syscall.SYS_GETDENTS: {}, - syscall.SYS_GETDENTS64: {}, - syscall.SYS_LSEEK: {}, - syscall.SYS_LSTAT: {}, - syscall.SYS_MKDIR: {}, - syscall.SYS_MKDIRAT: {}, - syscall.SYS_NEWFSTATAT: {}, - syscall.SYS_OPEN: {}, - syscall.SYS_OPENAT: {}, - syscall.SYS_PREAD64: {}, - syscall.SYS_PWRITE64: {}, - syscall.SYS_READ: {}, - syscall.SYS_READLINK: {}, - syscall.SYS_READLINKAT: {}, - syscall.SYS_RENAMEAT: {}, - syscall.SYS_STAT: {}, - syscall.SYS_SYMLINK: {}, - syscall.SYS_SYMLINKAT: {}, - syscall.SYS_SYNC_FILE_RANGE: {}, - syscall.SYS_UNLINK: {}, - syscall.SYS_UNLINKAT: {}, - syscall.SYS_UTIMENSAT: {}, - syscall.SYS_WRITE: {}, - } -} - // hostInetFilters contains syscalls that are needed by sentry/socket/hostinet. func hostInetFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 56d30f2a0..b656883ad 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -30,7 +30,6 @@ import ( // Options are seccomp filter related options. type Options struct { Platform platform.Platform - WhitelistFS bool HostNetwork bool ControllerFD int } @@ -44,10 +43,6 @@ func Install(opt Options) error { // when not enabled. s.Merge(instrumentationFilters()) - if opt.WhitelistFS { - Report("direct file access allows unrestricted file access!") - s.Merge(whitelistFSFilters()) - } if opt.HostNetwork { Report("host networking enabled: syscall filters less restrictive!") s.Merge(hostInetFilters()) diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 772df40fe..3df276170 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -204,7 +204,7 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f ) switch conf.FileAccess { - case FileAccessProxy, FileAccessProxyExclusive: + case FileAccessShared, FileAccessExclusive: fd := fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) hostFS := mustFindFilesystem("9p") @@ -214,13 +214,6 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f return nil, fmt.Errorf("failed to generate root mount point: %v", err) } - case FileAccessDirect: - hostFS := mustFindFilesystem("whitelistfs") - rootInode, err = hostFS.Mount(ctx, rootDevice, mf, "root="+spec.Root.Path+",dont_translate_ownership=true") - if err != nil { - return nil, fmt.Errorf("failed to generate root mount point: %v", err) - } - default: return nil, fmt.Errorf("invalid file access type: %v", conf.FileAccess) } @@ -289,13 +282,10 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri case bind: switch conf.FileAccess { - case FileAccessProxy, FileAccessProxyExclusive: + case FileAccessShared, FileAccessExclusive: fd := fds.remove() fsName = "9p" opts = p9MountOptions(conf, fd) - case FileAccessDirect: - fsName = "whitelistfs" - opts = []string{"root=" + m.Source, "dont_translate_ownership=true"} default: err = fmt.Errorf("invalid file access type: %v", conf.FileAccess) } @@ -423,7 +413,7 @@ func p9MountOptions(conf *Config, fd int) []string { "wfdno=" + strconv.Itoa(fd), "privateunixsocket=true", } - if conf.FileAccess == FileAccessProxy { + if conf.FileAccess == FileAccessShared { opts = append(opts, "cache=remote_revalidating") } return opts @@ -503,9 +493,6 @@ func addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount, f // createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts // to the environment. func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser) (*fs.RestoreEnvironment, error) { - if conf.FileAccess == FileAccessDirect { - return nil, fmt.Errorf("host filesystem with whitelist not supported with S/R") - } renv := &fs.RestoreEnvironment{ MountSources: make(map[string][]fs.MountArgs), } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 540cd6188..5fb489766 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -341,7 +341,6 @@ func (l *Loader) run() error { } else { opts := filter.Options{ Platform: l.k.Platform, - WhitelistFS: l.conf.FileAccess == FileAccessDirect, HostNetwork: l.conf.Network == NetworkHost, ControllerFD: l.ctrl.srv.FD(), } diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 2396d52c8..d6bfe9ff1 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -20,14 +20,18 @@ import ( "os" "reflect" "sync" + "syscall" "testing" "time" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/control/server" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/p9" "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.googlesource.com/gvisor/runsc/fsgofer" ) func init() { @@ -39,7 +43,6 @@ func testConfig() *Config { return &Config{ RootDir: "unused_root_dir", Network: NetworkNone, - FileAccess: FileAccessDirect, DisableSeccomp: true, } } @@ -58,23 +61,62 @@ func testSpec() *specs.Spec { } } -func createLoader() (*Loader, error) { +// startGofer starts a new gofer routine serving 'root' path. It returns the +// sandbox side of the connection, and a function that when called will stop the +// gofer. +func startGofer(root string) (int, func(), error) { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return 0, nil, err + } + sandboxEnd, goferEnd := fds[0], fds[1] + + socket, err := unet.NewSocket(goferEnd) + if err != nil { + syscall.Close(sandboxEnd) + syscall.Close(goferEnd) + return 0, nil, fmt.Errorf("error creating server on FD %d: %v", goferEnd, err) + } + go func() { + at := fsgofer.NewAttachPoint(root, fsgofer.Config{ROMount: true}) + s := p9.NewServer(at) + if err := s.Handle(socket); err != nil { + log.Infof("Gofer is stopping. FD: %d, err: %v\n", goferEnd, err) + } + }() + // Closing the gofer FD will stop the gofer and exit goroutine above. + return sandboxEnd, func() { syscall.Close(goferEnd) }, nil +} + +func createLoader() (*Loader, func(), error) { fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10])) if err != nil { - return nil, err + return nil, nil, err } conf := testConfig() spec := testSpec() - return New(spec, conf, fd, nil, false) + + sandEnd, cleanup, err := startGofer(spec.Root.Path) + if err != nil { + return nil, nil, err + } + + l, err := New(spec, conf, fd, []int{sandEnd}, false) + if err != nil { + cleanup() + return nil, nil, err + } + return l, cleanup, nil } // TestRun runs a simple application in a sandbox and checks that it succeeds. func TestRun(t *testing.T) { - s, err := createLoader() + s, cleanup, err := createLoader() if err != nil { t.Fatalf("error creating loader: %v", err) } defer s.Destroy() + defer cleanup() // Start a goroutine to read the start chan result, otherwise Run will // block forever. @@ -106,11 +148,12 @@ func TestRun(t *testing.T) { // TestStartSignal tests that the controller Start message will cause // WaitForStartSignal to return. func TestStartSignal(t *testing.T) { - s, err := createLoader() + s, cleanup, err := createLoader() if err != nil { t.Fatalf("error creating loader: %v", err) } defer s.Destroy() + defer cleanup() // We aren't going to wait on this application, so the control server // needs to be shut down manually. @@ -330,7 +373,14 @@ func TestCreateMountNamespace(t *testing.T) { t.Run(tc.name, func(t *testing.T) { conf := testConfig() ctx := contexttest.Context(t) - mm, err := createMountNamespace(ctx, ctx, &tc.spec, conf, nil) + + sandEnd, cleanup, err := startGofer(tc.spec.Root.Path) + if err != nil { + t.Fatalf("failed to create gofer: %v", err) + } + defer cleanup() + + mm, err := createMountNamespace(ctx, ctx, &tc.spec, conf, []int{sandEnd}) if err != nil { t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err) } @@ -352,7 +402,6 @@ func TestRestoreEnvironment(t *testing.T) { testCases := []struct { name string spec *specs.Spec - fileAccess FileAccessType ioFDs []int errorExpected bool expectedRenv fs.RestoreEnvironment @@ -375,7 +424,6 @@ func TestRestoreEnvironment(t *testing.T) { }, }, }, - fileAccess: FileAccessProxy, ioFDs: []int{0}, errorExpected: false, expectedRenv: fs.RestoreEnvironment{ @@ -430,7 +478,6 @@ func TestRestoreEnvironment(t *testing.T) { }, }, }, - fileAccess: FileAccessProxy, ioFDs: []int{0, 1}, errorExpected: false, expectedRenv: fs.RestoreEnvironment{ @@ -489,7 +536,6 @@ func TestRestoreEnvironment(t *testing.T) { }, }, }, - fileAccess: FileAccessProxy, ioFDs: []int{0}, errorExpected: false, expectedRenv: fs.RestoreEnvironment{ @@ -534,48 +580,10 @@ func TestRestoreEnvironment(t *testing.T) { }, }, }, - { - name: "whitelist error test", - spec: &specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, - }, - Mounts: []specs.Mount{ - { - Destination: "/dev/fd-foo", - Type: "bind", - }, - }, - }, - fileAccess: FileAccessDirect, - ioFDs: []int{0, 1}, - errorExpected: true, - }, - { - name: "bad options test", - spec: &specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, - }, - Mounts: []specs.Mount{ - { - Destination: "/dev/fd-foo", - Type: "tmpfs", - Options: []string{"invalid_option=true"}, - }, - }, - }, - fileAccess: FileAccessDirect, - ioFDs: []int{0}, - errorExpected: true, - }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { conf := testConfig() - conf.FileAccess = tc.fileAccess fds := &fdDispenser{fds: tc.ioFDs} actualRenv, err := createRestoreEnvironment(tc.spec, conf, fds) if !tc.errorExpected && err != nil { diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 784baf23b..d8c7b9cd3 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -102,13 +102,6 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) conf := args[0].(*boot.Config) waitStatus := args[1].(*syscall.WaitStatus) - // sentry should run with a umask of 0 when --file-access=direct, because we want - // to preserve file modes exactly as set by the sentry, which will have applied - // its own umask. - if conf.FileAccess == boot.FileAccessDirect { - syscall.Umask(0) - } - if b.applyCaps { caps := spec.Process.Capabilities if caps == nil { diff --git a/runsc/container/container.go b/runsc/container/container.go index 5977fbd21..9a05a1dc5 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -615,11 +615,6 @@ func (c *Container) waitForStopped() error { } func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) { - if conf.FileAccess == boot.FileAccessDirect { - // Don't start a gofer. The sandbox will access host FS directly. - return nil, nil - } - if err := setupFS(spec, conf, bundleDir); err != nil { return nil, fmt.Errorf("failed to setup mounts: %v", err) } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 9a94347b6..c45eb79a3 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -241,7 +241,7 @@ func configs(opts ...configOption) []*boot.Config { } c.Platform = boot.PlatformKVM case nonExclusiveFS: - c.FileAccess = boot.FileAccessProxy + c.FileAccess = boot.FileAccessShared default: panic(fmt.Sprintf("unknown config option %v", o)) @@ -1368,10 +1368,10 @@ func TestAbbreviatedIDs(t *testing.T) { // Check that modifications to a volume mount are propigated into and out of // the sandbox. func TestContainerVolumeContentsShared(t *testing.T) { - // Only run this test with shared proxy, since that is the only + // Only run this test with shared file access, since that is the only // behavior it is testing. conf := testutil.TestConfig() - conf.FileAccess = boot.FileAccessProxy + conf.FileAccess = boot.FileAccessShared t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of diff --git a/runsc/main.go b/runsc/main.go index c51b199aa..c30b29b81 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -58,7 +58,7 @@ var ( // Flags that control sandbox runtime behavior. platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") - fileAccess = flag.String("file-access", "proxy-exclusive", "specifies which filesystem to use: proxy-exclusive (default), proxy-shared, or direct. Using a proxy is more secure because it disallows the sandbox from opening files directly in the host. Setting 'proxy-shared' will disable caches and should be used if external modifications to the filesystem are expected.") + fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use: exclusive (default), shared. Setting 'shared' will disable caches and should be used if external modifications to the filesystem are expected.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") multiContainer = flag.Bool("multi-container", false, "enable *experimental* multi-container support.") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") @@ -112,8 +112,8 @@ func main() { cmd.Fatalf("%v", err) } - if fsAccess == boot.FileAccessProxy && *overlay { - cmd.Fatalf("overlay flag is incompatible with proxy-shared file access") + if fsAccess == boot.FileAccessShared && *overlay { + cmd.Fatalf("overlay flag is incompatible with shared file access") } netType, err := boot.MakeNetworkType(*network) diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index f6264d5b2..697210669 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -356,12 +356,8 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace}) } - if conf.FileAccess == boot.FileAccessDirect { - log.Infof("Sandbox will be started in the current mount namespace") - } else { - log.Infof("Sandbox will be started in new mount namespace") - nss = append(nss, specs.LinuxNamespace{Type: specs.MountNamespace}) - } + log.Infof("Sandbox will be started in new mount namespace") + nss = append(nss, specs.LinuxNamespace{Type: specs.MountNamespace}) // Joins the network namespace if network is enabled. the sandbox talks // directly to the host network, which may have been configured in the @@ -377,9 +373,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // User namespace depends on the following options: // - Host network/filesystem: requires to run inside the user namespace // specified in the spec or the current namespace if none is configured. - // - Gofer: when using a Gofer, the sandbox process can run isolated in a - // new user namespace with only the "nobody" user and group. - if conf.Network == boot.NetworkHost || conf.FileAccess == boot.FileAccessDirect { + if conf.Network == boot.NetworkHost { if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok { log.Infof("Sandbox will be started in container's user namespace: %+v", userns) nss = append(nss, userns) diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index 4f012a8ea..4d354de31 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -109,7 +109,7 @@ func TestConfig() *boot.Config { Network: boot.NetworkNone, Strace: true, MultiContainer: true, - FileAccess: boot.FileAccessProxyExclusive, + FileAccess: boot.FileAccessExclusive, TestOnlyAllowRunAsCurrentUserWithoutChroot: true, } }