From 46e6577014c849d7306c63905db25f3c695fa7e7 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 27 Dec 2018 14:58:41 -0800 Subject: [PATCH] Fix deadlock between epoll_wait and getdents epoll_wait acquires EventPoll.listsMu (in EventPoll.ReadEvents) and then calls Inotify.Readiness which tries to acquire Inotify.evMu. getdents acquires Inotify.evMu (in Inotify.queueEvent) and then calls readyCallback.Callback which tries to acquire EventPoll.listsMu. The fix is to release Inotify.evMu before calling Queue.Notify. Queue is thread-safe and doesn't require Inotify.evMu to be held. Closes #121 PiperOrigin-RevId: 227066695 Change-Id: Id29364bb940d1727f33a5dff9a3c52f390c15761 --- pkg/sentry/fs/inotify.go | 13 +++++--- test/syscalls/linux/BUILD | 3 ++ test/syscalls/linux/epoll.cc | 29 +----------------- test/syscalls/linux/inotify.cc | 55 ++++++++++++++++++++++++++++++++++ test/util/BUILD | 13 ++++++++ test/util/epoll_util.cc | 52 ++++++++++++++++++++++++++++++++ test/util/epoll_util.h | 36 ++++++++++++++++++++++ 7 files changed, 169 insertions(+), 32 deletions(-) create mode 100644 test/util/epoll_util.cc create mode 100644 test/util/epoll_util.h diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index f251df0d1..51ece5ed0 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -42,14 +42,14 @@ type Inotify struct { // user, since we may aggressively reuse an id on S/R. id uint64 - // evMu *only* protects the event queue. We need a separate lock because + waiter.Queue `state:"nosave"` + + // evMu *only* protects the events list. We need a separate lock because // while queuing events, a watch needs to lock the event queue, and using mu // for that would violate lock ordering since at that point the calling // goroutine already holds Watch.target.Watches.mu. evMu sync.Mutex `state:"nosave"` - waiter.Queue `state:"nosave"` - // A list of pending events for this inotify instance. Protected by evMu. events ilist.List @@ -212,7 +212,6 @@ func (i *Inotify) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArg func (i *Inotify) queueEvent(ev *Event) { i.evMu.Lock() - defer i.evMu.Unlock() // Check if we should coalesce the event we're about to queue with the last // one currently in the queue. Events are coalesced if they are identical. @@ -221,11 +220,17 @@ func (i *Inotify) queueEvent(ev *Event) { // "Coalesce" the two events by simply not queuing the new one. We // don't need to raise a waiter.EventIn notification because no new // data is available for reading. + i.evMu.Unlock() return } } i.events.PushBack(ev) + + // Release mutex before notifying waiters because we don't control what they + // can do. + i.evMu.Unlock() + i.Queue.Notify(waiter.EventIn) } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 0f9b406d8..ae33d14da 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -496,6 +496,7 @@ cc_binary( srcs = ["epoll.cc"], linkstatic = 1, deps = [ + "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:posix_error", "//test/util:test_main", @@ -844,6 +845,7 @@ cc_binary( srcs = ["inotify.cc"], linkstatic = 1, deps = [ + "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:fs_util", "//test/util:temp_path", @@ -852,6 +854,7 @@ cc_binary( "//test/util:thread_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", ], ) diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc index 9ae87c00b..46fba7b2d 100644 --- a/test/syscalls/linux/epoll.cc +++ b/test/syscalls/linux/epoll.cc @@ -25,6 +25,7 @@ #include #include "gtest/gtest.h" +#include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/test_util.h" @@ -37,18 +38,6 @@ namespace { constexpr int kFDsPerEpoll = 3; constexpr uint64_t kMagicConstant = 0x0102030405060708; -// Returns a new epoll file descriptor. -PosixErrorOr NewEpollFD() { - // "Since Linux 2.6.8, the size argument is ignored, but must be greater than - // zero." - epoll_create(2) - int fd = epoll_create(/* size = */ 1); - MaybeSave(); - if (fd < 0) { - return PosixError(errno, "epoll_create"); - } - return FileDescriptor(fd); -} - // Returns a new eventfd. PosixErrorOr NewEventFD() { int fd = eventfd(/* initval = */ 0, /* flags = */ 0); @@ -59,22 +48,6 @@ PosixErrorOr NewEventFD() { return FileDescriptor(fd); } -// Registers `target_fd` with the epoll instance represented by `epoll_fd` for -// the epoll events `events`. Events on `target_fd` will be indicated by setting -// data.u64 to `data` in the returned epoll_event. -PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, - uint64_t data) { - struct epoll_event event; - event.events = events; - event.data.u64 = data; - int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); - MaybeSave(); - if (rc < 0) { - return PosixError(errno, "epoll_ctl"); - } - return NoError(); -} - uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) { return (end->tv_sec - begin->tv_sec) * 1000 + (end->tv_nsec - begin->tv_nsec) / 1000000; diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 0e361496c..167ca44a8 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -14,9 +14,12 @@ #include #include +#include +#include #include #include +#include #include #include #include @@ -24,6 +27,9 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/temp_path.h" @@ -1512,6 +1518,55 @@ TEST(Inotify, ControlEvents) { ASSERT_THAT(events2, Are({})); } +// Regression test to ensure epoll and directory access doesn't deadlock. +TEST(Inotify, EpollNoDeadlock) { + const DisableSave ds; // Too many syscalls. + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + // Create lots of directories and watch all of them. + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::vector children; + for (size_t i = 0; i < 1000; ++i) { + auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ACCESS)); + children.emplace_back(std::move(child)); + } + + // Run epoll_wait constantly in a separate thread. + std::atomic done(false); + ScopedThread th([&fd, &done] { + for (auto start = absl::Now(); absl::Now() - start < absl::Seconds(5);) { + FileDescriptor epoll_fd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epoll_fd.get(), fd.get(), + EPOLLIN | EPOLLOUT | EPOLLET, 0)); + struct epoll_event result[1]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epoll_fd.get(), result, 1, -1), + SyscallSucceedsWithValue(1)); + + sched_yield(); + } + done = true; + }); + + // While epoll thread is running, constantly access all directories to + // generate inotify events. + while (!done) { + std::vector files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false)); + ASSERT_EQ(files.size(), 1002); + for (const auto& child : files) { + if (child == "." || child == "..") { + continue; + } + ASSERT_NO_ERRNO_AND_VALUE(ListDir(JoinPath(root.path(), child), false)); + } + sched_yield(); + } +} + } // namespace } // namespace testing } // namespace gvisor diff --git a/test/util/BUILD b/test/util/BUILD index f981a8d1d..10507eae4 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -259,3 +259,16 @@ cc_library( srcs = ["test_main.cc"], deps = [":test_util"], ) + +cc_library( + name = "epoll_util", + testonly = 1, + srcs = ["epoll_util.cc"], + hdrs = ["epoll_util.h"], + deps = [ + ":file_descriptor", + ":posix_error", + ":save_util", + "@com_google_googletest//:gtest", + ], +) diff --git a/test/util/epoll_util.cc b/test/util/epoll_util.cc new file mode 100644 index 000000000..0b95aa8cd --- /dev/null +++ b/test/util/epoll_util.cc @@ -0,0 +1,52 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/util/epoll_util.h" + +#include + +#include "gmock/gmock.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr NewEpollFD(int size) { + // "Since Linux 2.6.8, the size argument is ignored, but must be greater than + // zero." - epoll_create(2) + int fd = epoll_create(size); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "epoll_create"); + } + return FileDescriptor(fd); +} + +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data) { + struct epoll_event event; + event.events = events; + event.data.u64 = data; + int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "epoll_ctl"); + } + return NoError(); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/util/epoll_util.h b/test/util/epoll_util.h new file mode 100644 index 000000000..521e7a3d3 --- /dev/null +++ b/test/util/epoll_util.h @@ -0,0 +1,36 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_UTIL_EPOLL_UTIL_H_ +#define GVISOR_TEST_UTIL_EPOLL_UTIL_H_ + +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +// Returns a new epoll file descriptor. +PosixErrorOr NewEpollFD(int size = 1); + +// Registers `target_fd` with the epoll instance represented by `epoll_fd` for +// the epoll events `events`. Events on `target_fd` will be indicated by setting +// data.u64 to `data` in the returned epoll_event. +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_UTIL_EPOLL_UTIL_H_