diff --git a/pkg/refsvfs2/refs_map.go b/pkg/refsvfs2/refs_map.go index 57938d2f0..faf191f39 100644 --- a/pkg/refsvfs2/refs_map.go +++ b/pkg/refsvfs2/refs_map.go @@ -16,16 +16,12 @@ package refsvfs2 import ( "fmt" - "strings" "gvisor.dev/gvisor/pkg/log" refs_vfs1 "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sync" ) -// TODO(gvisor.dev/issue/1193): re-enable once kernfs refs are fixed. -var ignored []string = []string{"kernfs.", "proc.", "sys.", "devpts.", "fuse."} - var ( // liveObjects is a global map of reference-counted objects. Objects are // inserted when leak check is enabled, and they are removed when they are @@ -60,11 +56,6 @@ func leakCheckEnabled() bool { // Register adds obj to the live object map. func Register(obj CheckedObject) { if leakCheckEnabled() { - for _, str := range ignored { - if strings.Contains(obj.RefType(), str) { - return - } - } liveObjectsMu.Lock() if _, ok := liveObjects[obj]; ok { panic(fmt.Sprintf("Unexpected entry in leak checking map: reference %p already added", obj)) @@ -81,11 +72,6 @@ func Unregister(obj CheckedObject) { liveObjectsMu.Lock() defer liveObjectsMu.Unlock() if _, ok := liveObjects[obj]; !ok { - for _, str := range ignored { - if strings.Contains(obj.RefType(), str) { - return - } - } panic(fmt.Sprintf("Expected to find entry in leak checking map for reference %p", obj)) } delete(liveObjects, obj) diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 9185877f6..346cca558 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -113,7 +113,7 @@ func (fstype *FilesystemType) newFilesystem(ctx context.Context, vfsObj *vfs.Vir root.EnableLeakCheck() var rootD kernfs.Dentry - rootD.Init(&fs.Filesystem, root) + rootD.InitRoot(&fs.Filesystem, root) // Construct the pts master inode and dentry. Linux always uses inode // id 2 for ptmx. See fs/devpts/inode.c:mknod_ptmx. diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index e7ef5998e..6de416da0 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -205,7 +205,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } // root is the fusefs root directory. - root := fs.newRootInode(ctx, creds, fsopts.rootMode) + root := fs.newRoot(ctx, creds, fsopts.rootMode) return fs.VFSFilesystem(), root.VFSDentry(), nil } @@ -284,14 +284,14 @@ type inode struct { link string } -func (fs *filesystem) newRootInode(ctx context.Context, creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry { +func (fs *filesystem) newRoot(ctx context.Context, creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry { i := &inode{fs: fs, nodeID: 1} i.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, 1, linux.ModeDirectory|0755) i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) i.EnableLeakCheck() var d kernfs.Dentry - d.Init(&fs.Filesystem, i) + d.InitRoot(&fs.Filesystem, i) return &d } diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 399895f3e..f81056023 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -245,7 +245,41 @@ func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) er } // Release implements vfs.FilesystemImpl.Release. -func (fs *Filesystem) Release(context.Context) { +func (fs *Filesystem) Release(ctx context.Context) { + root := fs.root + if root == nil { + return + } + fs.mu.Lock() + root.releaseKeptDentriesLocked(ctx) + for fs.cachedDentriesLen != 0 { + fs.evictCachedDentryLocked(ctx) + } + fs.mu.Unlock() + // Drop ref acquired in Dentry.InitRoot(). + root.DecRef(ctx) +} + +// releaseKeptDentriesLocked recursively drops all dentry references created by +// Lookup when Dentry.inode.Keep() is true. +// +// Precondition: Filesystem.mu is held. +func (d *Dentry) releaseKeptDentriesLocked(ctx context.Context) { + if d.inode.Keep() { + d.decRefLocked(ctx) + } + + if d.isDir() { + var children []*Dentry + d.dirMu.Lock() + for _, child := range d.children { + children = append(children, child) + } + d.dirMu.Unlock() + for _, child := range children { + child.releaseKeptDentriesLocked(ctx) + } + } } // Sync implements vfs.FilesystemImpl.Sync. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 5c5e09ac5..abb477c7d 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -61,6 +61,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/refsvfs2" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -118,6 +119,12 @@ type Filesystem struct { // MaxCachedDentries is the maximum size of cachedDentries. If not set, // defaults to 0 and kernfs does not cache any dentries. This is immutable. MaxCachedDentries uint64 + + // root is the root dentry of this filesystem. Note that root may be nil for + // filesystems on a disconnected mount without a root (e.g. pipefs, sockfs, + // hostfs). Filesystem holds an extra reference on root to prevent it from + // being destroyed prematurely. This is immutable. + root *Dentry } // deferDecRef defers dropping a dentry ref until the next call to @@ -214,17 +221,19 @@ type Dentry struct { func (d *Dentry) IncRef() { // d.refs may be 0 if d.fs.mu is locked, which serializes against // d.cacheLocked(). - atomic.AddInt64(&d.refs, 1) + r := atomic.AddInt64(&d.refs, 1) + refsvfs2.LogIncRef(d, r) } // TryIncRef implements vfs.DentryImpl.TryIncRef. func (d *Dentry) TryIncRef() bool { for { - refs := atomic.LoadInt64(&d.refs) - if refs <= 0 { + r := atomic.LoadInt64(&d.refs) + if r <= 0 { return false } - if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) { + if atomic.CompareAndSwapInt64(&d.refs, r, r+1) { + refsvfs2.LogTryIncRef(d, r+1) return true } } @@ -232,11 +241,23 @@ func (d *Dentry) TryIncRef() bool { // DecRef implements vfs.DentryImpl.DecRef. func (d *Dentry) DecRef(ctx context.Context) { - if refs := atomic.AddInt64(&d.refs, -1); refs == 0 { + r := atomic.AddInt64(&d.refs, -1) + refsvfs2.LogDecRef(d, r) + if r == 0 { d.fs.mu.Lock() d.cacheLocked(ctx) d.fs.mu.Unlock() - } else if refs < 0 { + } else if r < 0 { + panic("kernfs.Dentry.DecRef() called without holding a reference") + } +} + +func (d *Dentry) decRefLocked(ctx context.Context) { + r := atomic.AddInt64(&d.refs, -1) + refsvfs2.LogDecRef(d, r) + if r == 0 { + d.cacheLocked(ctx) + } else if r < 0 { panic("kernfs.Dentry.DecRef() called without holding a reference") } } @@ -298,11 +319,20 @@ func (d *Dentry) cacheLocked(ctx context.Context) { if d.fs.cachedDentriesLen <= d.fs.MaxCachedDentries { return } + d.fs.evictCachedDentryLocked(ctx) + // Whether or not victim was destroyed, we brought fs.cachedDentriesLen + // back down to fs.opts.maxCachedDentries, so we don't loop. +} + +// Preconditions: +// * fs.mu must be locked for writing. +// * fs.cachedDentriesLen != 0. +func (fs *Filesystem) evictCachedDentryLocked(ctx context.Context) { // Evict the least recently used dentry because cache size is greater than // max cache size (configured on mount). - victim := d.fs.cachedDentries.Back() - d.fs.cachedDentries.Remove(victim) - d.fs.cachedDentriesLen-- + victim := fs.cachedDentries.Back() + fs.cachedDentries.Remove(victim) + fs.cachedDentriesLen-- victim.cached = false // victim.refs may have become non-zero from an earlier path resolution // after it was inserted into fs.cachedDentries. @@ -311,7 +341,7 @@ func (d *Dentry) cacheLocked(ctx context.Context) { victim.parent.dirMu.Lock() // Note that victim can't be a mount point (in any mount // namespace), since VFS holds references on mount points. - d.fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, victim.VFSDentry()) + fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, victim.VFSDentry()) delete(victim.parent.children, victim.name) victim.parent.dirMu.Unlock() } @@ -330,7 +360,8 @@ func (d *Dentry) cacheLocked(ctx context.Context) { // by path traversal. // * d.vfsd.IsDead() is true. func (d *Dentry) destroyLocked(ctx context.Context) { - switch atomic.LoadInt64(&d.refs) { + refs := atomic.LoadInt64(&d.refs) + switch refs { case 0: // Mark the dentry destroyed. atomic.StoreInt64(&d.refs, -1) @@ -343,15 +374,42 @@ func (d *Dentry) destroyLocked(ctx context.Context) { d.inode.DecRef(ctx) // IncRef from Init. d.inode = nil - // Drop the reference held by d on its parent without recursively locking - // d.fs.mu. if d.parent != nil { - if refs := atomic.AddInt64(&d.parent.refs, -1); refs == 0 { - d.parent.cacheLocked(ctx) - } else if refs < 0 { - panic("kernfs.Dentry.DecRef() called without holding a reference") - } + d.parent.decRefLocked(ctx) } + + refsvfs2.Unregister(d) +} + +// RefType implements refsvfs2.CheckedObject.Type. +func (d *Dentry) RefType() string { + return "kernfs.Dentry" +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (d *Dentry) LeakMessage() string { + return fmt.Sprintf("[kernfs.Dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs)) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +// +// This should only be set to true for debugging purposes, as it can generate an +// extremely large amount of output and drastically degrade performance. +func (d *Dentry) LogRefs() bool { + return false +} + +// InitRoot initializes this dentry as the root of the filesystem. +// +// Precondition: Caller must hold a reference on inode. +// +// Postcondition: Caller's reference on inode is transferred to the dentry. +func (d *Dentry) InitRoot(fs *Filesystem, inode Inode) { + d.Init(fs, inode) + fs.root = d + // Hold an extra reference on the root dentry. It is held by fs to prevent the + // root from being "cached" and subsequently evicted. + d.IncRef() } // Init initializes this dentry. @@ -371,6 +429,7 @@ func (d *Dentry) Init(fs *Filesystem, inode Inode) { if ftype == linux.ModeSymlink { d.flags |= dflagsIsSymlink } + refsvfs2.Register(d) } // VFSDentry returns the generic vfs dentry for this kernfs dentry. diff --git a/pkg/sentry/fsimpl/kernfs/save_restore.go b/pkg/sentry/fsimpl/kernfs/save_restore.go index 1f48de6f1..f78509eb7 100644 --- a/pkg/sentry/fsimpl/kernfs/save_restore.go +++ b/pkg/sentry/fsimpl/kernfs/save_restore.go @@ -14,6 +14,19 @@ package kernfs +import ( + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// afterLoad is invoked by stateify. +func (d *Dentry) afterLoad() { + if atomic.LoadInt64(&d.refs) >= 0 { + refsvfs2.Register(d) + } +} + // afterLoad is invoked by stateify. func (i *inodePlatformFile) afterLoad() { if i.fileMapper.IsInited() { diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 99abcab66..8716d0a3c 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -94,7 +94,7 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF inode := procfs.newTasksInode(ctx, k, pidns, cgroups) var dentry kernfs.Dentry - dentry.Init(&procfs.Filesystem, inode) + dentry.InitRoot(&procfs.Filesystem, inode) return procfs.VFSFilesystem(), dentry.VFSDentry(), nil } diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 7d2147141..506a2a0f0 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -102,7 +102,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt "power": fs.newDir(ctx, creds, defaultSysDirMode, nil), }) var rootD kernfs.Dentry - rootD.Init(&fs.Filesystem, root) + rootD.InitRoot(&fs.Filesystem, root) return fs.VFSFilesystem(), rootD.VFSDentry(), nil }