From 53eeb06ef14915eee799e9d7d59603ed2a0fe1c1 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 6 Nov 2020 12:53:49 -0800 Subject: [PATCH] Fix infinite loop when splicing to pipes/eventfds. Writes to pipes of size < PIPE_BUF are guaranteed to be atomic, so writes larger than that will return EAGAIN if the pipe has capacity < PIPE_BUF. Writes to eventfds will return EAGAIN if the write would cause the eventfd value to go over the max. In both such cases, calling Ready() on the FD will return true (because it is possible to write), but specific kinds of writes will in fact return EAGAIN. This CL fixes an infinite loop in splice and sendfile (VFS1 and VFS2) by forcing skipping the readiness check for the outfile in send, splice, and tee. PiperOrigin-RevId: 341102260 --- pkg/sentry/syscalls/linux/sys_splice.go | 30 +++++++------ pkg/sentry/syscalls/linux/vfs2/splice.go | 21 +++++---- test/syscalls/linux/BUILD | 5 +++ test/syscalls/linux/sendfile.cc | 53 +++++++++++++++++++++++ test/syscalls/linux/splice.cc | 55 ++++++++++++++++++++++++ test/syscalls/linux/timers.cc | 5 --- test/util/timer_util.h | 5 +++ 7 files changed, 146 insertions(+), 28 deletions(-) diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 46616c961..1c4cdb0dd 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -41,6 +41,7 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB inCh chan struct{} outCh chan struct{} ) + for opts.Length > 0 { n, err = fs.Splice(t, outFile, inFile, opts) opts.Length -= n @@ -61,23 +62,28 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB inW, _ := waiter.NewChannelEntry(inCh) inFile.EventRegister(&inW, EventMaskRead) defer inFile.EventUnregister(&inW) - continue // Need to refresh readiness. + // Need to refresh readiness. + continue } if err = t.Block(inCh); err != nil { break } } - if outFile.Readiness(EventMaskWrite) == 0 { - if outCh == nil { - outCh = make(chan struct{}, 1) - outW, _ := waiter.NewChannelEntry(outCh) - outFile.EventRegister(&outW, EventMaskWrite) - defer outFile.EventUnregister(&outW) - continue // Need to refresh readiness. - } - if err = t.Block(outCh); err != nil { - break - } + // Don't bother checking readiness of the outFile, because it's not a + // guarantee that it won't return EWOULDBLOCK. Both pipes and eventfds + // can be "ready" but will reject writes of certain sizes with + // EWOULDBLOCK. + if outCh == nil { + outCh = make(chan struct{}, 1) + outW, _ := waiter.NewChannelEntry(outCh) + outFile.EventRegister(&outW, EventMaskWrite) + defer outFile.EventUnregister(&outW) + // We might be ready to write now. Try again before + // blocking. + continue + } + if err = t.Block(outCh); err != nil { + break } } diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 035e2a6b0..9ce4f280a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -480,18 +480,17 @@ func (dw *dualWaiter) waitForBoth(t *kernel.Task) error { // waitForOut waits for dw.outfile to be read. func (dw *dualWaiter) waitForOut(t *kernel.Task) error { - if dw.outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { - if dw.outCh == nil { - dw.outW, dw.outCh = waiter.NewChannelEntry(nil) - dw.outFile.EventRegister(&dw.outW, eventMaskWrite) - // We might be ready now. Try again before blocking. - return nil - } - if err := t.Block(dw.outCh); err != nil { - return err - } + // Don't bother checking readiness of the outFile, because it's not a + // guarantee that it won't return EWOULDBLOCK. Both pipes and eventfds + // can be "ready" but will reject writes of certain sizes with + // EWOULDBLOCK. See b/172075629, b/170743336. + if dw.outCh == nil { + dw.outW, dw.outCh = waiter.NewChannelEntry(nil) + dw.outFile.EventRegister(&dw.outW, eventMaskWrite) + // We might be ready to write now. Try again before blocking. + return nil } - return nil + return t.Block(dw.outCh) } // destroy cleans up resources help by dw. No more calls to wait* can occur diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 99f286bfc..2350f7e69 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2106,10 +2106,12 @@ cc_binary( "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, + "//test/util:signal_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "//test/util:timer_util", ], ) @@ -2129,6 +2131,7 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "//test/util:timer_util", ], ) @@ -2142,10 +2145,12 @@ cc_binary( "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, + "//test/util:signal_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "//test/util:timer_util", ], ) diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index a8bfb01f1..cf0977118 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -25,9 +25,11 @@ #include "absl/time/time.h" #include "test/util/eventfd_util.h" #include "test/util/file_descriptor.h" +#include "test/util/signal_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" +#include "test/util/timer_util.h" namespace gvisor { namespace testing { @@ -629,6 +631,57 @@ TEST(SendFileTest, SendFileToPipe) { SyscallSucceedsWithValue(kDataSize)); } +static volatile int signaled = 0; +void SigUsr1Handler(int sig, siginfo_t* info, void* context) { signaled = 1; } + +TEST(SendFileTest, ToEventFDDoesNotSpin_NoRandomSave) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + + // Write the maximum value of an eventfd to a file. + const uint64_t kMaxEventfdValue = 0xfffffffffffffffe; + const auto tempfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto tempfd = ASSERT_NO_ERRNO_AND_VALUE(Open(tempfile.path(), O_RDWR)); + ASSERT_THAT( + pwrite(tempfd.get(), &kMaxEventfdValue, sizeof(kMaxEventfdValue), 0), + SyscallSucceedsWithValue(sizeof(kMaxEventfdValue))); + + // Set the eventfd's value to 1. + const uint64_t kOne = 1; + ASSERT_THAT(write(efd.get(), &kOne, sizeof(kOne)), + SyscallSucceedsWithValue(sizeof(kOne))); + + // Set up signal handler. + struct sigaction sa = {}; + sa.sa_sigaction = SigUsr1Handler; + sa.sa_flags = SA_SIGINFO; + const auto cleanup_sigact = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + // Send SIGUSR1 to this thread in 1 second. + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = SIGUSR1; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(absl::Seconds(1)); + DisableSave ds; // Asserting an EINTR. + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // Sendfile from tempfd to the eventfd. Since the eventfd is not already at + // its maximum value, the eventfd is "ready for writing"; however, since the + // eventfd's existing value plus the new value would exceed the maximum, the + // write should internally fail with EWOULDBLOCK. In this case, sendfile() + // should block instead of spinning, and eventually be interrupted by our + // timer. See b/172075629. + EXPECT_THAT( + sendfile(efd.get(), tempfd.get(), nullptr, sizeof(kMaxEventfdValue)), + SyscallFailsWithErrno(EINTR)); + + // Signal should have been handled. + EXPECT_EQ(signaled, 1); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index a1d2b9b11..c2369db54 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -26,9 +26,11 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/file_descriptor.h" +#include "test/util/signal_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" +#include "test/util/timer_util.h" namespace gvisor { namespace testing { @@ -772,6 +774,59 @@ TEST(SpliceTest, FromPipeToDevZero) { SyscallSucceedsWithValue(0)); } +static volatile int signaled = 0; +void SigUsr1Handler(int sig, siginfo_t* info, void* context) { signaled = 1; } + +TEST(SpliceTest, ToPipeWithSmallCapacityDoesNotSpin_NoRandomSave) { + // Writes to a pipe that are less than PIPE_BUF must be atomic. This test + // creates a pipe with only 128 bytes of capacity (< PIPE_BUF) and checks that + // splicing to the pipe does not spin. See b/170743336. + + // Create a file with one page of data. + std::vector buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(buf.data(), buf.size()), + TempPath::kDefaultFileMode)); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // Create a pipe with size 4096, and fill all but 128 bytes of it. + int p[2]; + ASSERT_THAT(pipe(p), SyscallSucceeds()); + ASSERT_THAT(fcntl(p[1], F_SETPIPE_SZ, kPageSize), SyscallSucceeds()); + const int kWriteSize = kPageSize - 128; + std::vector writeBuf(kWriteSize); + RandomizeBuffer(writeBuf.data(), writeBuf.size()); + ASSERT_THAT(write(p[1], writeBuf.data(), writeBuf.size()), + SyscallSucceedsWithValue(kWriteSize)); + + // Set up signal handler. + struct sigaction sa = {}; + sa.sa_sigaction = SigUsr1Handler; + sa.sa_flags = SA_SIGINFO; + const auto cleanup_sigact = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + // Send SIGUSR1 to this thread in 1 second. + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = SIGUSR1; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(absl::Seconds(1)); + DisableSave ds; // Asserting an EINTR. + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // Now splice the file to the pipe. This should block, but not spin, and + // should return EINTR because it is interrupted by the signal. + EXPECT_THAT(splice(fd.get(), nullptr, p[1], nullptr, kPageSize, 0), + SyscallFailsWithErrno(EINTR)); + + // Alarm should have been handled. + EXPECT_EQ(signaled, 1); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc index cac94d9e1..93a98adb1 100644 --- a/test/syscalls/linux/timers.cc +++ b/test/syscalls/linux/timers.cc @@ -322,11 +322,6 @@ TEST(IntervalTimerTest, PeriodicGroupDirectedSignal) { EXPECT_GE(counted_signals.load(), kCycles); } -// From Linux's include/uapi/asm-generic/siginfo.h. -#ifndef sigev_notify_thread_id -#define sigev_notify_thread_id _sigev_un._tid -#endif - TEST(IntervalTimerTest, PeriodicThreadDirectedSignal) { constexpr int kSigno = SIGUSR1; constexpr int kSigvalue = 42; diff --git a/test/util/timer_util.h b/test/util/timer_util.h index 926e6632f..e389108ef 100644 --- a/test/util/timer_util.h +++ b/test/util/timer_util.h @@ -33,6 +33,11 @@ namespace gvisor { namespace testing { +// From Linux's include/uapi/asm-generic/siginfo.h. +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + // Returns the current time. absl::Time Now(clockid_t id);