From 29be908ab69d2d333572f6990d331e494b1e51fd Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 18 Mar 2021 12:11:59 -0700 Subject: [PATCH 1/2] Address post submit comments for fs benchmarks. Also, drop fio total reads/writes to 1GB as 10GB is prohibitively slow. PiperOrigin-RevId: 363714060 --- .buildkite/pipeline.yaml | 4 ++-- test/benchmarks/fs/BUILD | 2 ++ test/benchmarks/fs/bazel_test.go | 23 +++++++++++---------- test/benchmarks/fs/fio_test.go | 20 +++++++++++++------ test/benchmarks/harness/BUILD | 1 + test/benchmarks/harness/util.go | 34 ++++++++++++++++++-------------- 6 files changed, 51 insertions(+), 33 deletions(-) diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml index aa2fd1f47..98526dcec 100644 --- a/.buildkite/pipeline.yaml +++ b/.buildkite/pipeline.yaml @@ -186,10 +186,10 @@ steps: # For fio, running with --test.benchtime=Xs scales the written/read # bytes to several GB. This is not a problem for root/bind/volume mounts, # but for tmpfs mounts, the size can grow to more memory than the machine - # has availabe. Fix the runs to 10GB written/read for the benchmark. + # has availabe. Fix the runs to 1GB written/read for the benchmark. - <<: *benchmarks label: ":floppy_disk: FIO benchmarks" - command: make benchmark-platforms BENCHMARKS_SUITE=fio BENCHMARKS_TARGETS=test/benchmarks/fs:fio_test BENCHMARKS_OPTIONS=--test.benchtime=10000x + command: make benchmark-platforms BENCHMARKS_SUITE=fio BENCHMARKS_TARGETS=test/benchmarks/fs:fio_test BENCHMARKS_OPTIONS=--test.benchtime=1000x - <<: *benchmarks label: ":globe_with_meridians: HTTPD benchmarks" command: make benchmark-platforms BENCHMARKS_FILTER="Continuous" BENCHMARKS_SUITE=httpd BENCHMARKS_TARGETS=test/benchmarks/network:httpd_test diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD index c94caab60..dc82e63b2 100644 --- a/test/benchmarks/fs/BUILD +++ b/test/benchmarks/fs/BUILD @@ -8,6 +8,7 @@ benchmark_test( srcs = ["bazel_test.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/test/dockerutil", "//test/benchmarks/harness", "//test/benchmarks/tools", @@ -21,6 +22,7 @@ benchmark_test( srcs = ["fio_test.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/test/dockerutil", "//test/benchmarks/harness", "//test/benchmarks/tools", diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index 7ced963f6..797b1952d 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -20,6 +20,7 @@ import ( "strings" "testing" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/benchmarks/harness" "gvisor.dev/gvisor/test/benchmarks/tools" @@ -28,8 +29,8 @@ import ( // Dimensions here are clean/dirty cache (do or don't drop caches) // and if the mount on which we are compiling is a tmpfs/bind mount. type benchmark struct { - clearCache bool // clearCache drops caches before running. - fstype string // type of filesystem to use. + clearCache bool // clearCache drops caches before running. + fstype harness.FileSystemType // type of filesystem to use. } // Note: CleanCache versions of this test require running with root permissions. @@ -48,12 +49,12 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) { // Get a machine from the Harness on which to run. machine, err := harness.GetMachine() if err != nil { - b.Fatalf("failed to get machine: %v", err) + b.Fatalf("Failed to get machine: %v", err) } defer machine.CleanUp() benchmarks := make([]benchmark, 0, 6) - for _, filesys := range []string{harness.BindFS, harness.TmpFS, harness.RootFS} { + for _, filesys := range []harness.FileSystemType{harness.BindFS, harness.TmpFS, harness.RootFS} { benchmarks = append(benchmarks, benchmark{ clearCache: true, fstype: filesys, @@ -75,7 +76,7 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) { filesystem := tools.Parameter{ Name: "filesystem", - Value: bm.fstype, + Value: string(bm.fstype), } name, err := tools.ParametersToName(pageCache, filesystem) if err != nil { @@ -86,13 +87,14 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) { // Grab a container. ctx := context.Background() container := machine.GetContainer(ctx, b) - defer container.CleanUp(ctx) - - mts, prefix, cleanup, err := harness.MakeMount(machine, bm.fstype) + cu := cleanup.Make(func() { + container.CleanUp(ctx) + }) + defer cu.Clean() + mts, prefix, err := harness.MakeMount(machine, bm.fstype, &cu) if err != nil { b.Fatalf("Failed to make mount: %v", err) } - defer cleanup() runOpts := dockerutil.RunOpts{ Image: image, @@ -104,8 +106,9 @@ func runBuildBenchmark(b *testing.B, image, workdir, target string) { b.Fatalf("run failed with: %v", err) } + cpCmd := fmt.Sprintf("mkdir -p %s && cp -r %s %s/.", prefix, workdir, prefix) if out, err := container.Exec(ctx, dockerutil.ExecOpts{}, - "cp", "-rf", workdir, prefix+"/."); err != nil { + "/bin/sh", "-c", cpCmd); err != nil { b.Fatalf("failed to copy directory: %v (%s)", err, out) } diff --git a/test/benchmarks/fs/fio_test.go b/test/benchmarks/fs/fio_test.go index f783a2b33..1482466f4 100644 --- a/test/benchmarks/fs/fio_test.go +++ b/test/benchmarks/fs/fio_test.go @@ -21,6 +21,7 @@ import ( "strings" "testing" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/benchmarks/harness" "gvisor.dev/gvisor/test/benchmarks/tools" @@ -69,7 +70,7 @@ func BenchmarkFio(b *testing.B) { } defer machine.CleanUp() - for _, fsType := range []string{harness.BindFS, harness.TmpFS, harness.RootFS} { + for _, fsType := range []harness.FileSystemType{harness.BindFS, harness.TmpFS, harness.RootFS} { for _, tc := range testCases { operation := tools.Parameter{ Name: "operation", @@ -81,7 +82,7 @@ func BenchmarkFio(b *testing.B) { } filesystem := tools.Parameter{ Name: "filesystem", - Value: fsType, + Value: string(fsType), } name, err := tools.ParametersToName(operation, blockSize, filesystem) if err != nil { @@ -90,15 +91,18 @@ func BenchmarkFio(b *testing.B) { b.Run(name, func(b *testing.B) { b.StopTimer() tc.Size = b.N + ctx := context.Background() container := machine.GetContainer(ctx, b) - defer container.CleanUp(ctx) + cu := cleanup.Make(func() { + container.CleanUp(ctx) + }) + defer cu.Clean() - mnts, outdir, mountCleanup, err := harness.MakeMount(machine, fsType) + mnts, outdir, err := harness.MakeMount(machine, fsType, &cu) if err != nil { b.Fatalf("failed to make mount: %v", err) } - defer mountCleanup() // Start the container with the mount. if err := container.Spawn( @@ -112,6 +116,11 @@ func BenchmarkFio(b *testing.B) { b.Fatalf("failed to start fio container with: %v", err) } + if out, err := container.Exec(ctx, dockerutil.ExecOpts{}, + "mkdir", "-p", outdir); err != nil { + b.Fatalf("failed to copy directory: %v (%s)", err, out) + } + // Directory and filename inside container where fio will read/write. outfile := filepath.Join(outdir, "test.txt") @@ -130,7 +139,6 @@ func BenchmarkFio(b *testing.B) { } cmd := tc.MakeCmd(outfile) - if err := harness.DropCaches(machine); err != nil { b.Fatalf("failed to drop caches: %v", err) } diff --git a/test/benchmarks/harness/BUILD b/test/benchmarks/harness/BUILD index 116610938..367316661 100644 --- a/test/benchmarks/harness/BUILD +++ b/test/benchmarks/harness/BUILD @@ -12,6 +12,7 @@ go_library( ], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/test/dockerutil", "//pkg/test/testutil", "@com_github_docker_docker//api/types/mount:go_default_library", diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go index 36abe1069..f7e569751 100644 --- a/test/benchmarks/harness/util.go +++ b/test/benchmarks/harness/util.go @@ -22,6 +22,7 @@ import ( "testing" "github.com/docker/docker/api/types/mount" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/pkg/test/testutil" ) @@ -58,52 +59,55 @@ func DebugLog(b *testing.B, msg string, args ...interface{}) { } } +// FileSystemType represents a type container mount. +type FileSystemType string + const ( // BindFS indicates a bind mount should be created. - BindFS = "bindfs" + BindFS FileSystemType = "bindfs" // TmpFS indicates a tmpfs mount should be created. - TmpFS = "tmpfs" + TmpFS FileSystemType = "tmpfs" // RootFS indicates no mount should be created and the root mount should be used. - RootFS = "rootfs" + RootFS FileSystemType = "rootfs" ) // MakeMount makes a mount and cleanup based on the requested type. Bind // and volume mounts are backed by a temp directory made with mktemp. // tmpfs mounts require no such backing and are just made. // rootfs mounts do not make a mount, but instead return a target direectory at root. -// It is up to the caller to call the returned cleanup. -func MakeMount(machine Machine, fsType string) ([]mount.Mount, string, func(), error) { +// It is up to the caller to call Clean on the passed *cleanup.Cleanup +func MakeMount(machine Machine, fsType FileSystemType, cu *cleanup.Cleanup) ([]mount.Mount, string, error) { mounts := make([]mount.Mount, 0, 1) + target := "/data" switch fsType { case BindFS: dir, err := machine.RunCommand("mktemp", "-d") if err != nil { - return mounts, "", func() {}, fmt.Errorf("failed to create tempdir: %v", err) + return mounts, "", fmt.Errorf("failed to create tempdir: %v", err) } dir = strings.TrimSuffix(dir, "\n") - + cu.Add(func() { + machine.RunCommand("rm", "-rf", dir) + }) out, err := machine.RunCommand("chmod", "777", dir) if err != nil { - machine.RunCommand("rm", "-rf", dir) - return mounts, "", func() {}, fmt.Errorf("failed modify directory: %v %s", err, out) + return mounts, "", fmt.Errorf("failed modify directory: %v %s", err, out) } - target := "/data" mounts = append(mounts, mount.Mount{ Target: target, Source: dir, Type: mount.TypeBind, }) - return mounts, target, func() { machine.RunCommand("rm", "-rf", dir) }, nil + return mounts, target, nil case RootFS: - return mounts, "/", func() {}, nil + return mounts, target, nil case TmpFS: - target := "/data" mounts = append(mounts, mount.Mount{ Target: target, Type: mount.TypeTmpfs, }) - return mounts, target, func() {}, nil + return mounts, target, nil default: - return mounts, "", func() {}, fmt.Errorf("illegal mount type not supported: %v", fsType) + return mounts, "", fmt.Errorf("illegal mount type not supported: %v", fsType) } } From 7fac7e32f3a866134bcee499dfc64459946dfe9d Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 18 Mar 2021 12:14:01 -0700 Subject: [PATCH 2/2] Translate syserror when validating partial IO errors syserror allows packages to register translators for errors. These translators should be called prior to checking if the error is valid, otherwise it may not account for possible errors that can be returned from different packages, e.g. safecopy.BusError => syserror.EFAULT. Second attempt, it passes tests now :-) PiperOrigin-RevId: 363714508 --- pkg/sentry/syscalls/linux/error.go | 26 ++++++---- pkg/syserror/syserror.go | 10 ++-- test/syscalls/linux/BUILD | 3 ++ test/syscalls/linux/read.cc | 40 +++++++++++++++ test/syscalls/linux/write.cc | 78 ++++++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 15 deletions(-) diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 5bd526b73..efec93f73 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -75,17 +75,25 @@ func handleIOError(ctx context.Context, partialResult bool, ioerr, intr error, o // errors, we may consume the error and return only the partial read/write. // // Returns false if error is unknown. -func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, op string) (bool, error) { - switch err { - case nil: +func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr error, op string) (bool, error) { + if errOrig == nil { // Typical successful syscall. return true, nil + } + + // Translate error, if possible, to consolidate errors from other packages + // into a smaller set of errors from syserror package. + translatedErr := errOrig + if errno, ok := syserror.TranslateError(errOrig); ok { + translatedErr = errno + } + switch translatedErr { case io.EOF: // EOF is always consumed. If this is a partial read/write // (result != 0), the application will see that, otherwise // they will see 0. return true, nil - case syserror.ErrExceedsFileSizeLimit: + case syserror.EFBIG: t := kernel.TaskFromContext(ctx) if t == nil { panic("I/O error should only occur from a context associated with a Task") @@ -98,7 +106,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, // Simultaneously send a SIGXFSZ per setrlimit(2). t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t)) return true, syserror.EFBIG - case syserror.ErrInterrupted: + case syserror.EINTR: // The syscall was interrupted. Return nil if it completed // partially, otherwise return the error code that the syscall // needs (to indicate to the kernel what it should do). @@ -110,10 +118,10 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, if !partialResult { // Typical syscall error. - return true, err + return true, errOrig } - switch err { + switch translatedErr { case syserror.EINTR: // Syscall interrupted, but completed a partial // read/write. Like ErrWouldBlock, since we have a @@ -143,7 +151,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, // For TCP sendfile connections, we may have a reset or timeout. But we // should just return n as the result. return true, nil - case syserror.ErrWouldBlock: + case syserror.EWOULDBLOCK: // Syscall would block, but completed a partial read/write. // This case should only be returned by IssueIO for nonblocking // files. Since we have a partial read/write, we consume @@ -151,7 +159,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, return true, nil } - switch err.(type) { + switch errOrig.(type) { case syserror.SyscallRestartErrno: // Identical to the EINTR case. return true, nil diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go index 97de17afe..56b621357 100644 --- a/pkg/syserror/syserror.go +++ b/pkg/syserror/syserror.go @@ -130,17 +130,15 @@ func AddErrorUnwrapper(unwrap func(e error) (unix.Errno, bool)) { // TranslateError translates errors to errnos, it will return false if // the error was not registered. func TranslateError(from error) (unix.Errno, bool) { - err, ok := errorMap[from] - if ok { - return err, ok + if err, ok := errorMap[from]; ok { + return err, true } // Try to unwrap the error if we couldn't match an error // exactly. This might mean that a package has its own // error type. for _, unwrap := range errorUnwrappers { - err, ok := unwrap(from) - if ok { - return err, ok + if err, ok := unwrap(from); ok { + return err, true } } return 0, false diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 037181f7e..043ada583 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1922,7 +1922,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", gtest, + "//test/util:cleanup", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", @@ -3991,6 +3993,7 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/base:core_headers", gtest, "//test/util:temp_path", "//test/util:test_main", diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc index 98d5e432d..087262535 100644 --- a/test/syscalls/linux/read.cc +++ b/test/syscalls/linux/read.cc @@ -13,11 +13,14 @@ // limitations under the License. #include +#include #include #include #include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -121,6 +124,43 @@ TEST_F(ReadTest, ReadWithOpath) { EXPECT_THAT(ReadFd(fd.get(), buf.data(), 1), SyscallFailsWithErrno(EBADF)); } +// Test that partial writes that hit SIGSEGV are correctly handled and return +// partial write. +TEST_F(ReadTest, PartialReadSIGSEGV) { + // Allocate 2 pages and remove permission from the second. + const size_t size = 2 * kPageSize; + void* addr = + mmap(0, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + ASSERT_NE(addr, MAP_FAILED); + auto cleanup = Cleanup( + [addr, size] { EXPECT_THAT(munmap(addr, size), SyscallSucceeds()); }); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(name_.c_str(), O_RDWR, 0666)); + for (size_t i = 0; i < 2; i++) { + EXPECT_THAT(pwrite(fd.get(), addr, size, 0), + SyscallSucceedsWithValue(size)); + } + + void* badAddr = reinterpret_cast(addr) + kPageSize; + ASSERT_THAT(mprotect(badAddr, kPageSize, PROT_NONE), SyscallSucceeds()); + + // Attempt to read to both pages. Create a non-contiguous iovec pair to + // ensure operation is done in 2 steps. + struct iovec iov[] = { + { + .iov_base = addr, + .iov_len = kPageSize, + }, + { + .iov_base = addr, + .iov_len = size, + }, + }; + EXPECT_THAT(preadv(fd.get(), iov, ABSL_ARRAYSIZE(iov), 0), + SyscallSucceedsWithValue(size)); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc index 740992d0a..3373ba72b 100644 --- a/test/syscalls/linux/write.cc +++ b/test/syscalls/linux/write.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/macros.h" #include "test/util/cleanup.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -256,6 +258,82 @@ TEST_F(WriteTest, PwriteWithOpath) { SyscallFailsWithErrno(EBADF)); } +// Test that partial writes that hit SIGSEGV are correctly handled and return +// partial write. +TEST_F(WriteTest, PartialWriteSIGSEGV) { + // Allocate 2 pages and remove permission from the second. + const size_t size = 2 * kPageSize; + void* addr = mmap(0, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + ASSERT_NE(addr, MAP_FAILED); + auto cleanup = Cleanup( + [addr, size] { EXPECT_THAT(munmap(addr, size), SyscallSucceeds()); }); + + void* badAddr = reinterpret_cast(addr) + kPageSize; + ASSERT_THAT(mprotect(badAddr, kPageSize, PROT_NONE), SyscallSucceeds()); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_WRONLY)); + + // Attempt to write both pages to the file. Create a non-contiguous iovec pair + // to ensure operation is done in 2 steps. + struct iovec iov[] = { + { + .iov_base = addr, + .iov_len = kPageSize, + }, + { + .iov_base = addr, + .iov_len = size, + }, + }; + // Write should succeed for the first iovec and half of the second (=2 pages). + EXPECT_THAT(pwritev(fd.get(), iov, ABSL_ARRAYSIZE(iov), 0), + SyscallSucceedsWithValue(2 * kPageSize)); +} + +// Test that partial writes that hit SIGBUS are correctly handled and return +// partial write. +TEST_F(WriteTest, PartialWriteSIGBUS) { + SKIP_IF(getenv("GVISOR_GOFER_UNCACHED")); // Can't mmap from uncached files. + + TempPath mapfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd_map = + ASSERT_NO_ERRNO_AND_VALUE(Open(mapfile.path().c_str(), O_RDWR)); + + // Let the first page be read to force a partial write. + ASSERT_THAT(ftruncate(fd_map.get(), kPageSize), SyscallSucceeds()); + + // Map 2 pages, one of which is not allocated in the backing file. Reading + // from it will trigger a SIGBUS. + const size_t size = 2 * kPageSize; + void* addr = + mmap(NULL, size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd_map.get(), 0); + ASSERT_NE(addr, MAP_FAILED); + auto cleanup = Cleanup( + [addr, size] { EXPECT_THAT(munmap(addr, size), SyscallSucceeds()); }); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_WRONLY)); + + // Attempt to write both pages to the file. Create a non-contiguous iovec pair + // to ensure operation is done in 2 steps. + struct iovec iov[] = { + { + .iov_base = addr, + .iov_len = kPageSize, + }, + { + .iov_base = addr, + .iov_len = size, + }, + }; + // Write should succeed for the first iovec and half of the second (=2 pages). + ASSERT_THAT(pwritev(fd.get(), iov, ABSL_ARRAYSIZE(iov), 0), + SyscallSucceedsWithValue(2 * kPageSize)); +} + } // namespace } // namespace testing