diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 46616c961..1c4cdb0dd 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -41,6 +41,7 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB inCh chan struct{} outCh chan struct{} ) + for opts.Length > 0 { n, err = fs.Splice(t, outFile, inFile, opts) opts.Length -= n @@ -61,23 +62,28 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB inW, _ := waiter.NewChannelEntry(inCh) inFile.EventRegister(&inW, EventMaskRead) defer inFile.EventUnregister(&inW) - continue // Need to refresh readiness. + // Need to refresh readiness. + continue } if err = t.Block(inCh); err != nil { break } } - if outFile.Readiness(EventMaskWrite) == 0 { - if outCh == nil { - outCh = make(chan struct{}, 1) - outW, _ := waiter.NewChannelEntry(outCh) - outFile.EventRegister(&outW, EventMaskWrite) - defer outFile.EventUnregister(&outW) - continue // Need to refresh readiness. - } - if err = t.Block(outCh); err != nil { - break - } + // Don't bother checking readiness of the outFile, because it's not a + // guarantee that it won't return EWOULDBLOCK. Both pipes and eventfds + // can be "ready" but will reject writes of certain sizes with + // EWOULDBLOCK. + if outCh == nil { + outCh = make(chan struct{}, 1) + outW, _ := waiter.NewChannelEntry(outCh) + outFile.EventRegister(&outW, EventMaskWrite) + defer outFile.EventUnregister(&outW) + // We might be ready to write now. Try again before + // blocking. + continue + } + if err = t.Block(outCh); err != nil { + break } } diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 035e2a6b0..9ce4f280a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -480,18 +480,17 @@ func (dw *dualWaiter) waitForBoth(t *kernel.Task) error { // waitForOut waits for dw.outfile to be read. func (dw *dualWaiter) waitForOut(t *kernel.Task) error { - if dw.outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { - if dw.outCh == nil { - dw.outW, dw.outCh = waiter.NewChannelEntry(nil) - dw.outFile.EventRegister(&dw.outW, eventMaskWrite) - // We might be ready now. Try again before blocking. - return nil - } - if err := t.Block(dw.outCh); err != nil { - return err - } + // Don't bother checking readiness of the outFile, because it's not a + // guarantee that it won't return EWOULDBLOCK. Both pipes and eventfds + // can be "ready" but will reject writes of certain sizes with + // EWOULDBLOCK. See b/172075629, b/170743336. + if dw.outCh == nil { + dw.outW, dw.outCh = waiter.NewChannelEntry(nil) + dw.outFile.EventRegister(&dw.outW, eventMaskWrite) + // We might be ready to write now. Try again before blocking. + return nil } - return nil + return t.Block(dw.outCh) } // destroy cleans up resources help by dw. No more calls to wait* can occur diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 99f286bfc..2350f7e69 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2106,10 +2106,12 @@ cc_binary( "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, + "//test/util:signal_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "//test/util:timer_util", ], ) @@ -2129,6 +2131,7 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "//test/util:timer_util", ], ) @@ -2142,10 +2145,12 @@ cc_binary( "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, + "//test/util:signal_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "//test/util:timer_util", ], ) diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index a8bfb01f1..cf0977118 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -25,9 +25,11 @@ #include "absl/time/time.h" #include "test/util/eventfd_util.h" #include "test/util/file_descriptor.h" +#include "test/util/signal_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" +#include "test/util/timer_util.h" namespace gvisor { namespace testing { @@ -629,6 +631,57 @@ TEST(SendFileTest, SendFileToPipe) { SyscallSucceedsWithValue(kDataSize)); } +static volatile int signaled = 0; +void SigUsr1Handler(int sig, siginfo_t* info, void* context) { signaled = 1; } + +TEST(SendFileTest, ToEventFDDoesNotSpin_NoRandomSave) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + + // Write the maximum value of an eventfd to a file. + const uint64_t kMaxEventfdValue = 0xfffffffffffffffe; + const auto tempfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto tempfd = ASSERT_NO_ERRNO_AND_VALUE(Open(tempfile.path(), O_RDWR)); + ASSERT_THAT( + pwrite(tempfd.get(), &kMaxEventfdValue, sizeof(kMaxEventfdValue), 0), + SyscallSucceedsWithValue(sizeof(kMaxEventfdValue))); + + // Set the eventfd's value to 1. + const uint64_t kOne = 1; + ASSERT_THAT(write(efd.get(), &kOne, sizeof(kOne)), + SyscallSucceedsWithValue(sizeof(kOne))); + + // Set up signal handler. + struct sigaction sa = {}; + sa.sa_sigaction = SigUsr1Handler; + sa.sa_flags = SA_SIGINFO; + const auto cleanup_sigact = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + // Send SIGUSR1 to this thread in 1 second. + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = SIGUSR1; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(absl::Seconds(1)); + DisableSave ds; // Asserting an EINTR. + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // Sendfile from tempfd to the eventfd. Since the eventfd is not already at + // its maximum value, the eventfd is "ready for writing"; however, since the + // eventfd's existing value plus the new value would exceed the maximum, the + // write should internally fail with EWOULDBLOCK. In this case, sendfile() + // should block instead of spinning, and eventually be interrupted by our + // timer. See b/172075629. + EXPECT_THAT( + sendfile(efd.get(), tempfd.get(), nullptr, sizeof(kMaxEventfdValue)), + SyscallFailsWithErrno(EINTR)); + + // Signal should have been handled. + EXPECT_EQ(signaled, 1); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index a1d2b9b11..c2369db54 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -26,9 +26,11 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/file_descriptor.h" +#include "test/util/signal_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" +#include "test/util/timer_util.h" namespace gvisor { namespace testing { @@ -772,6 +774,59 @@ TEST(SpliceTest, FromPipeToDevZero) { SyscallSucceedsWithValue(0)); } +static volatile int signaled = 0; +void SigUsr1Handler(int sig, siginfo_t* info, void* context) { signaled = 1; } + +TEST(SpliceTest, ToPipeWithSmallCapacityDoesNotSpin_NoRandomSave) { + // Writes to a pipe that are less than PIPE_BUF must be atomic. This test + // creates a pipe with only 128 bytes of capacity (< PIPE_BUF) and checks that + // splicing to the pipe does not spin. See b/170743336. + + // Create a file with one page of data. + std::vector buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(buf.data(), buf.size()), + TempPath::kDefaultFileMode)); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // Create a pipe with size 4096, and fill all but 128 bytes of it. + int p[2]; + ASSERT_THAT(pipe(p), SyscallSucceeds()); + ASSERT_THAT(fcntl(p[1], F_SETPIPE_SZ, kPageSize), SyscallSucceeds()); + const int kWriteSize = kPageSize - 128; + std::vector writeBuf(kWriteSize); + RandomizeBuffer(writeBuf.data(), writeBuf.size()); + ASSERT_THAT(write(p[1], writeBuf.data(), writeBuf.size()), + SyscallSucceedsWithValue(kWriteSize)); + + // Set up signal handler. + struct sigaction sa = {}; + sa.sa_sigaction = SigUsr1Handler; + sa.sa_flags = SA_SIGINFO; + const auto cleanup_sigact = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + // Send SIGUSR1 to this thread in 1 second. + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = SIGUSR1; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(absl::Seconds(1)); + DisableSave ds; // Asserting an EINTR. + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // Now splice the file to the pipe. This should block, but not spin, and + // should return EINTR because it is interrupted by the signal. + EXPECT_THAT(splice(fd.get(), nullptr, p[1], nullptr, kPageSize, 0), + SyscallFailsWithErrno(EINTR)); + + // Alarm should have been handled. + EXPECT_EQ(signaled, 1); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc index cac94d9e1..93a98adb1 100644 --- a/test/syscalls/linux/timers.cc +++ b/test/syscalls/linux/timers.cc @@ -322,11 +322,6 @@ TEST(IntervalTimerTest, PeriodicGroupDirectedSignal) { EXPECT_GE(counted_signals.load(), kCycles); } -// From Linux's include/uapi/asm-generic/siginfo.h. -#ifndef sigev_notify_thread_id -#define sigev_notify_thread_id _sigev_un._tid -#endif - TEST(IntervalTimerTest, PeriodicThreadDirectedSignal) { constexpr int kSigno = SIGUSR1; constexpr int kSigvalue = 42; diff --git a/test/util/timer_util.h b/test/util/timer_util.h index 926e6632f..e389108ef 100644 --- a/test/util/timer_util.h +++ b/test/util/timer_util.h @@ -33,6 +33,11 @@ namespace gvisor { namespace testing { +// From Linux's include/uapi/asm-generic/siginfo.h. +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + // Returns the current time. absl::Time Now(clockid_t id);