diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index f251df0d1..51ece5ed0 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -42,14 +42,14 @@ type Inotify struct { // user, since we may aggressively reuse an id on S/R. id uint64 - // evMu *only* protects the event queue. We need a separate lock because + waiter.Queue `state:"nosave"` + + // evMu *only* protects the events list. We need a separate lock because // while queuing events, a watch needs to lock the event queue, and using mu // for that would violate lock ordering since at that point the calling // goroutine already holds Watch.target.Watches.mu. evMu sync.Mutex `state:"nosave"` - waiter.Queue `state:"nosave"` - // A list of pending events for this inotify instance. Protected by evMu. events ilist.List @@ -212,7 +212,6 @@ func (i *Inotify) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArg func (i *Inotify) queueEvent(ev *Event) { i.evMu.Lock() - defer i.evMu.Unlock() // Check if we should coalesce the event we're about to queue with the last // one currently in the queue. Events are coalesced if they are identical. @@ -221,11 +220,17 @@ func (i *Inotify) queueEvent(ev *Event) { // "Coalesce" the two events by simply not queuing the new one. We // don't need to raise a waiter.EventIn notification because no new // data is available for reading. + i.evMu.Unlock() return } } i.events.PushBack(ev) + + // Release mutex before notifying waiters because we don't control what they + // can do. + i.evMu.Unlock() + i.Queue.Notify(waiter.EventIn) } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 0f9b406d8..ae33d14da 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -496,6 +496,7 @@ cc_binary( srcs = ["epoll.cc"], linkstatic = 1, deps = [ + "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:posix_error", "//test/util:test_main", @@ -844,6 +845,7 @@ cc_binary( srcs = ["inotify.cc"], linkstatic = 1, deps = [ + "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:fs_util", "//test/util:temp_path", @@ -852,6 +854,7 @@ cc_binary( "//test/util:thread_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", ], ) diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc index 9ae87c00b..46fba7b2d 100644 --- a/test/syscalls/linux/epoll.cc +++ b/test/syscalls/linux/epoll.cc @@ -25,6 +25,7 @@ #include #include "gtest/gtest.h" +#include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/test_util.h" @@ -37,18 +38,6 @@ namespace { constexpr int kFDsPerEpoll = 3; constexpr uint64_t kMagicConstant = 0x0102030405060708; -// Returns a new epoll file descriptor. -PosixErrorOr NewEpollFD() { - // "Since Linux 2.6.8, the size argument is ignored, but must be greater than - // zero." - epoll_create(2) - int fd = epoll_create(/* size = */ 1); - MaybeSave(); - if (fd < 0) { - return PosixError(errno, "epoll_create"); - } - return FileDescriptor(fd); -} - // Returns a new eventfd. PosixErrorOr NewEventFD() { int fd = eventfd(/* initval = */ 0, /* flags = */ 0); @@ -59,22 +48,6 @@ PosixErrorOr NewEventFD() { return FileDescriptor(fd); } -// Registers `target_fd` with the epoll instance represented by `epoll_fd` for -// the epoll events `events`. Events on `target_fd` will be indicated by setting -// data.u64 to `data` in the returned epoll_event. -PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, - uint64_t data) { - struct epoll_event event; - event.events = events; - event.data.u64 = data; - int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); - MaybeSave(); - if (rc < 0) { - return PosixError(errno, "epoll_ctl"); - } - return NoError(); -} - uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) { return (end->tv_sec - begin->tv_sec) * 1000 + (end->tv_nsec - begin->tv_nsec) / 1000000; diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 0e361496c..167ca44a8 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -14,9 +14,12 @@ #include #include +#include +#include #include #include +#include #include #include #include @@ -24,6 +27,9 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/temp_path.h" @@ -1512,6 +1518,55 @@ TEST(Inotify, ControlEvents) { ASSERT_THAT(events2, Are({})); } +// Regression test to ensure epoll and directory access doesn't deadlock. +TEST(Inotify, EpollNoDeadlock) { + const DisableSave ds; // Too many syscalls. + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + // Create lots of directories and watch all of them. + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::vector children; + for (size_t i = 0; i < 1000; ++i) { + auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ACCESS)); + children.emplace_back(std::move(child)); + } + + // Run epoll_wait constantly in a separate thread. + std::atomic done(false); + ScopedThread th([&fd, &done] { + for (auto start = absl::Now(); absl::Now() - start < absl::Seconds(5);) { + FileDescriptor epoll_fd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epoll_fd.get(), fd.get(), + EPOLLIN | EPOLLOUT | EPOLLET, 0)); + struct epoll_event result[1]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epoll_fd.get(), result, 1, -1), + SyscallSucceedsWithValue(1)); + + sched_yield(); + } + done = true; + }); + + // While epoll thread is running, constantly access all directories to + // generate inotify events. + while (!done) { + std::vector files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false)); + ASSERT_EQ(files.size(), 1002); + for (const auto& child : files) { + if (child == "." || child == "..") { + continue; + } + ASSERT_NO_ERRNO_AND_VALUE(ListDir(JoinPath(root.path(), child), false)); + } + sched_yield(); + } +} + } // namespace } // namespace testing } // namespace gvisor diff --git a/test/util/BUILD b/test/util/BUILD index f981a8d1d..10507eae4 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -259,3 +259,16 @@ cc_library( srcs = ["test_main.cc"], deps = [":test_util"], ) + +cc_library( + name = "epoll_util", + testonly = 1, + srcs = ["epoll_util.cc"], + hdrs = ["epoll_util.h"], + deps = [ + ":file_descriptor", + ":posix_error", + ":save_util", + "@com_google_googletest//:gtest", + ], +) diff --git a/test/util/epoll_util.cc b/test/util/epoll_util.cc new file mode 100644 index 000000000..0b95aa8cd --- /dev/null +++ b/test/util/epoll_util.cc @@ -0,0 +1,52 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/util/epoll_util.h" + +#include + +#include "gmock/gmock.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr NewEpollFD(int size) { + // "Since Linux 2.6.8, the size argument is ignored, but must be greater than + // zero." - epoll_create(2) + int fd = epoll_create(size); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "epoll_create"); + } + return FileDescriptor(fd); +} + +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data) { + struct epoll_event event; + event.events = events; + event.data.u64 = data; + int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "epoll_ctl"); + } + return NoError(); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/util/epoll_util.h b/test/util/epoll_util.h new file mode 100644 index 000000000..521e7a3d3 --- /dev/null +++ b/test/util/epoll_util.h @@ -0,0 +1,36 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_UTIL_EPOLL_UTIL_H_ +#define GVISOR_TEST_UTIL_EPOLL_UTIL_H_ + +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +// Returns a new epoll file descriptor. +PosixErrorOr NewEpollFD(int size = 1); + +// Registers `target_fd` with the epoll instance represented by `epoll_fd` for +// the epoll events `events`. Events on `target_fd` will be indicated by setting +// data.u64 to `data` in the returned epoll_event. +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_UTIL_EPOLL_UTIL_H_