Fix deadlock between epoll_wait and getdents

epoll_wait acquires EventPoll.listsMu (in EventPoll.ReadEvents) and
then calls Inotify.Readiness which tries to acquire Inotify.evMu.

getdents acquires Inotify.evMu (in Inotify.queueEvent) and then calls
readyCallback.Callback which tries to acquire EventPoll.listsMu.

The fix is to release Inotify.evMu before calling Queue.Notify. Queue
is thread-safe and doesn't require Inotify.evMu to be held.

Closes #121

PiperOrigin-RevId: 227066695
Change-Id: Id29364bb940d1727f33a5dff9a3c52f390c15761
This commit is contained in:
Fabricio Voznika 2018-12-27 14:58:41 -08:00 committed by Shentubot
parent bce2f9751f
commit 46e6577014
7 changed files with 169 additions and 32 deletions

View File

@ -42,14 +42,14 @@ type Inotify struct {
// user, since we may aggressively reuse an id on S/R.
id uint64
// evMu *only* protects the event queue. We need a separate lock because
waiter.Queue `state:"nosave"`
// evMu *only* protects the events list. We need a separate lock because
// while queuing events, a watch needs to lock the event queue, and using mu
// for that would violate lock ordering since at that point the calling
// goroutine already holds Watch.target.Watches.mu.
evMu sync.Mutex `state:"nosave"`
waiter.Queue `state:"nosave"`
// A list of pending events for this inotify instance. Protected by evMu.
events ilist.List
@ -212,7 +212,6 @@ func (i *Inotify) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArg
func (i *Inotify) queueEvent(ev *Event) {
i.evMu.Lock()
defer i.evMu.Unlock()
// Check if we should coalesce the event we're about to queue with the last
// one currently in the queue. Events are coalesced if they are identical.
@ -221,11 +220,17 @@ func (i *Inotify) queueEvent(ev *Event) {
// "Coalesce" the two events by simply not queuing the new one. We
// don't need to raise a waiter.EventIn notification because no new
// data is available for reading.
i.evMu.Unlock()
return
}
}
i.events.PushBack(ev)
// Release mutex before notifying waiters because we don't control what they
// can do.
i.evMu.Unlock()
i.Queue.Notify(waiter.EventIn)
}

View File

@ -496,6 +496,7 @@ cc_binary(
srcs = ["epoll.cc"],
linkstatic = 1,
deps = [
"//test/util:epoll_util",
"//test/util:file_descriptor",
"//test/util:posix_error",
"//test/util:test_main",
@ -844,6 +845,7 @@ cc_binary(
srcs = ["inotify.cc"],
linkstatic = 1,
deps = [
"//test/util:epoll_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
"//test/util:temp_path",
@ -852,6 +854,7 @@ cc_binary(
"//test/util:thread_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/time",
],
)

View File

@ -25,6 +25,7 @@
#include <unistd.h>
#include "gtest/gtest.h"
#include "test/util/epoll_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
#include "test/util/test_util.h"
@ -37,18 +38,6 @@ namespace {
constexpr int kFDsPerEpoll = 3;
constexpr uint64_t kMagicConstant = 0x0102030405060708;
// Returns a new epoll file descriptor.
PosixErrorOr<FileDescriptor> NewEpollFD() {
// "Since Linux 2.6.8, the size argument is ignored, but must be greater than
// zero." - epoll_create(2)
int fd = epoll_create(/* size = */ 1);
MaybeSave();
if (fd < 0) {
return PosixError(errno, "epoll_create");
}
return FileDescriptor(fd);
}
// Returns a new eventfd.
PosixErrorOr<FileDescriptor> NewEventFD() {
int fd = eventfd(/* initval = */ 0, /* flags = */ 0);
@ -59,22 +48,6 @@ PosixErrorOr<FileDescriptor> NewEventFD() {
return FileDescriptor(fd);
}
// Registers `target_fd` with the epoll instance represented by `epoll_fd` for
// the epoll events `events`. Events on `target_fd` will be indicated by setting
// data.u64 to `data` in the returned epoll_event.
PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events,
uint64_t data) {
struct epoll_event event;
event.events = events;
event.data.u64 = data;
int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event);
MaybeSave();
if (rc < 0) {
return PosixError(errno, "epoll_ctl");
}
return NoError();
}
uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) {
return (end->tv_sec - begin->tv_sec) * 1000 +
(end->tv_nsec - begin->tv_nsec) / 1000000;

View File

@ -14,9 +14,12 @@
#include <fcntl.h>
#include <libgen.h>
#include <sched.h>
#include <sys/epoll.h>
#include <sys/inotify.h>
#include <sys/ioctl.h>
#include <atomic>
#include <list>
#include <string>
#include <vector>
@ -24,6 +27,9 @@
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "test/util/epoll_util.h"
#include "test/util/file_descriptor.h"
#include "test/util/fs_util.h"
#include "test/util/temp_path.h"
@ -1512,6 +1518,55 @@ TEST(Inotify, ControlEvents) {
ASSERT_THAT(events2, Are({}));
}
// Regression test to ensure epoll and directory access doesn't deadlock.
TEST(Inotify, EpollNoDeadlock) {
const DisableSave ds; // Too many syscalls.
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
// Create lots of directories and watch all of them.
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
std::vector<TempPath> children;
for (size_t i = 0; i < 1000; ++i) {
auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
ASSERT_NO_ERRNO_AND_VALUE(
InotifyAddWatch(fd.get(), child.path(), IN_ACCESS));
children.emplace_back(std::move(child));
}
// Run epoll_wait constantly in a separate thread.
std::atomic<bool> done(false);
ScopedThread th([&fd, &done] {
for (auto start = absl::Now(); absl::Now() - start < absl::Seconds(5);) {
FileDescriptor epoll_fd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
ASSERT_NO_ERRNO(RegisterEpollFD(epoll_fd.get(), fd.get(),
EPOLLIN | EPOLLOUT | EPOLLET, 0));
struct epoll_event result[1];
EXPECT_THAT(RetryEINTR(epoll_wait)(epoll_fd.get(), result, 1, -1),
SyscallSucceedsWithValue(1));
sched_yield();
}
done = true;
});
// While epoll thread is running, constantly access all directories to
// generate inotify events.
while (!done) {
std::vector<std::string> files =
ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false));
ASSERT_EQ(files.size(), 1002);
for (const auto& child : files) {
if (child == "." || child == "..") {
continue;
}
ASSERT_NO_ERRNO_AND_VALUE(ListDir(JoinPath(root.path(), child), false));
}
sched_yield();
}
}
} // namespace
} // namespace testing
} // namespace gvisor

View File

@ -259,3 +259,16 @@ cc_library(
srcs = ["test_main.cc"],
deps = [":test_util"],
)
cc_library(
name = "epoll_util",
testonly = 1,
srcs = ["epoll_util.cc"],
hdrs = ["epoll_util.h"],
deps = [
":file_descriptor",
":posix_error",
":save_util",
"@com_google_googletest//:gtest",
],
)

52
test/util/epoll_util.cc Normal file
View File

@ -0,0 +1,52 @@
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "test/util/epoll_util.h"
#include <sys/epoll.h>
#include "gmock/gmock.h"
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
#include "test/util/save_util.h"
namespace gvisor {
namespace testing {
PosixErrorOr<FileDescriptor> NewEpollFD(int size) {
// "Since Linux 2.6.8, the size argument is ignored, but must be greater than
// zero." - epoll_create(2)
int fd = epoll_create(size);
MaybeSave();
if (fd < 0) {
return PosixError(errno, "epoll_create");
}
return FileDescriptor(fd);
}
PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events,
uint64_t data) {
struct epoll_event event;
event.events = events;
event.data.u64 = data;
int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event);
MaybeSave();
if (rc < 0) {
return PosixError(errno, "epoll_ctl");
}
return NoError();
}
} // namespace testing
} // namespace gvisor

36
test/util/epoll_util.h Normal file
View File

@ -0,0 +1,36 @@
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef GVISOR_TEST_UTIL_EPOLL_UTIL_H_
#define GVISOR_TEST_UTIL_EPOLL_UTIL_H_
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
namespace gvisor {
namespace testing {
// Returns a new epoll file descriptor.
PosixErrorOr<FileDescriptor> NewEpollFD(int size = 1);
// Registers `target_fd` with the epoll instance represented by `epoll_fd` for
// the epoll events `events`. Events on `target_fd` will be indicated by setting
// data.u64 to `data` in the returned epoll_event.
PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events,
uint64_t data);
} // namespace testing
} // namespace gvisor
#endif // GVISOR_TEST_UTIL_EPOLL_UTIL_H_