diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 5bd526b73..efec93f73 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -75,17 +75,25 @@ func handleIOError(ctx context.Context, partialResult bool, ioerr, intr error, o // errors, we may consume the error and return only the partial read/write. // // Returns false if error is unknown. -func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, op string) (bool, error) { - switch err { - case nil: +func handleIOErrorImpl(ctx context.Context, partialResult bool, errOrig, intr error, op string) (bool, error) { + if errOrig == nil { // Typical successful syscall. return true, nil + } + + // Translate error, if possible, to consolidate errors from other packages + // into a smaller set of errors from syserror package. + translatedErr := errOrig + if errno, ok := syserror.TranslateError(errOrig); ok { + translatedErr = errno + } + switch translatedErr { case io.EOF: // EOF is always consumed. If this is a partial read/write // (result != 0), the application will see that, otherwise // they will see 0. return true, nil - case syserror.ErrExceedsFileSizeLimit: + case syserror.EFBIG: t := kernel.TaskFromContext(ctx) if t == nil { panic("I/O error should only occur from a context associated with a Task") @@ -98,7 +106,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, // Simultaneously send a SIGXFSZ per setrlimit(2). t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t)) return true, syserror.EFBIG - case syserror.ErrInterrupted: + case syserror.EINTR: // The syscall was interrupted. Return nil if it completed // partially, otherwise return the error code that the syscall // needs (to indicate to the kernel what it should do). @@ -110,10 +118,10 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, if !partialResult { // Typical syscall error. - return true, err + return true, errOrig } - switch err { + switch translatedErr { case syserror.EINTR: // Syscall interrupted, but completed a partial // read/write. Like ErrWouldBlock, since we have a @@ -143,7 +151,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, // For TCP sendfile connections, we may have a reset or timeout. But we // should just return n as the result. return true, nil - case syserror.ErrWouldBlock: + case syserror.EWOULDBLOCK: // Syscall would block, but completed a partial read/write. // This case should only be returned by IssueIO for nonblocking // files. Since we have a partial read/write, we consume @@ -151,7 +159,7 @@ func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, return true, nil } - switch err.(type) { + switch errOrig.(type) { case syserror.SyscallRestartErrno: // Identical to the EINTR case. return true, nil diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go index 97de17afe..56b621357 100644 --- a/pkg/syserror/syserror.go +++ b/pkg/syserror/syserror.go @@ -130,17 +130,15 @@ func AddErrorUnwrapper(unwrap func(e error) (unix.Errno, bool)) { // TranslateError translates errors to errnos, it will return false if // the error was not registered. func TranslateError(from error) (unix.Errno, bool) { - err, ok := errorMap[from] - if ok { - return err, ok + if err, ok := errorMap[from]; ok { + return err, true } // Try to unwrap the error if we couldn't match an error // exactly. This might mean that a package has its own // error type. for _, unwrap := range errorUnwrappers { - err, ok := unwrap(from) - if ok { - return err, ok + if err, ok := unwrap(from); ok { + return err, true } } return 0, false diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 037181f7e..043ada583 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1922,7 +1922,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", gtest, + "//test/util:cleanup", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", @@ -3991,6 +3993,7 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/base:core_headers", gtest, "//test/util:temp_path", "//test/util:test_main", diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc index 98d5e432d..087262535 100644 --- a/test/syscalls/linux/read.cc +++ b/test/syscalls/linux/read.cc @@ -13,11 +13,14 @@ // limitations under the License. #include +#include #include #include #include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -121,6 +124,43 @@ TEST_F(ReadTest, ReadWithOpath) { EXPECT_THAT(ReadFd(fd.get(), buf.data(), 1), SyscallFailsWithErrno(EBADF)); } +// Test that partial writes that hit SIGSEGV are correctly handled and return +// partial write. +TEST_F(ReadTest, PartialReadSIGSEGV) { + // Allocate 2 pages and remove permission from the second. + const size_t size = 2 * kPageSize; + void* addr = + mmap(0, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + ASSERT_NE(addr, MAP_FAILED); + auto cleanup = Cleanup( + [addr, size] { EXPECT_THAT(munmap(addr, size), SyscallSucceeds()); }); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(name_.c_str(), O_RDWR, 0666)); + for (size_t i = 0; i < 2; i++) { + EXPECT_THAT(pwrite(fd.get(), addr, size, 0), + SyscallSucceedsWithValue(size)); + } + + void* badAddr = reinterpret_cast(addr) + kPageSize; + ASSERT_THAT(mprotect(badAddr, kPageSize, PROT_NONE), SyscallSucceeds()); + + // Attempt to read to both pages. Create a non-contiguous iovec pair to + // ensure operation is done in 2 steps. + struct iovec iov[] = { + { + .iov_base = addr, + .iov_len = kPageSize, + }, + { + .iov_base = addr, + .iov_len = size, + }, + }; + EXPECT_THAT(preadv(fd.get(), iov, ABSL_ARRAYSIZE(iov), 0), + SyscallSucceedsWithValue(size)); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc index 740992d0a..3373ba72b 100644 --- a/test/syscalls/linux/write.cc +++ b/test/syscalls/linux/write.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/macros.h" #include "test/util/cleanup.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -256,6 +258,82 @@ TEST_F(WriteTest, PwriteWithOpath) { SyscallFailsWithErrno(EBADF)); } +// Test that partial writes that hit SIGSEGV are correctly handled and return +// partial write. +TEST_F(WriteTest, PartialWriteSIGSEGV) { + // Allocate 2 pages and remove permission from the second. + const size_t size = 2 * kPageSize; + void* addr = mmap(0, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + ASSERT_NE(addr, MAP_FAILED); + auto cleanup = Cleanup( + [addr, size] { EXPECT_THAT(munmap(addr, size), SyscallSucceeds()); }); + + void* badAddr = reinterpret_cast(addr) + kPageSize; + ASSERT_THAT(mprotect(badAddr, kPageSize, PROT_NONE), SyscallSucceeds()); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_WRONLY)); + + // Attempt to write both pages to the file. Create a non-contiguous iovec pair + // to ensure operation is done in 2 steps. + struct iovec iov[] = { + { + .iov_base = addr, + .iov_len = kPageSize, + }, + { + .iov_base = addr, + .iov_len = size, + }, + }; + // Write should succeed for the first iovec and half of the second (=2 pages). + EXPECT_THAT(pwritev(fd.get(), iov, ABSL_ARRAYSIZE(iov), 0), + SyscallSucceedsWithValue(2 * kPageSize)); +} + +// Test that partial writes that hit SIGBUS are correctly handled and return +// partial write. +TEST_F(WriteTest, PartialWriteSIGBUS) { + SKIP_IF(getenv("GVISOR_GOFER_UNCACHED")); // Can't mmap from uncached files. + + TempPath mapfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd_map = + ASSERT_NO_ERRNO_AND_VALUE(Open(mapfile.path().c_str(), O_RDWR)); + + // Let the first page be read to force a partial write. + ASSERT_THAT(ftruncate(fd_map.get(), kPageSize), SyscallSucceeds()); + + // Map 2 pages, one of which is not allocated in the backing file. Reading + // from it will trigger a SIGBUS. + const size_t size = 2 * kPageSize; + void* addr = + mmap(NULL, size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd_map.get(), 0); + ASSERT_NE(addr, MAP_FAILED); + auto cleanup = Cleanup( + [addr, size] { EXPECT_THAT(munmap(addr, size), SyscallSucceeds()); }); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_WRONLY)); + + // Attempt to write both pages to the file. Create a non-contiguous iovec pair + // to ensure operation is done in 2 steps. + struct iovec iov[] = { + { + .iov_base = addr, + .iov_len = kPageSize, + }, + { + .iov_base = addr, + .iov_len = size, + }, + }; + // Write should succeed for the first iovec and half of the second (=2 pages). + ASSERT_THAT(pwritev(fd.get(), iov, ABSL_ARRAYSIZE(iov), 0), + SyscallSucceedsWithValue(2 * kPageSize)); +} + } // namespace } // namespace testing