From b72e1b3c0873ea29d031db42e39ca053923eecff Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Mon, 25 Nov 2019 18:09:15 -0800 Subject: [PATCH] Minor VFS2 interface changes. - Remove the Filesystem argument from DentryImpl.*Ref(); in general DentryImpls that need the Filesystem for reference counting will probably also need it for other interface methods that don't plumb Filesystem, so it's easier to just store a pointer to the filesystem in the DentryImpl. - Add a pointer to the VirtualFilesystem to Filesystem, which is needed by the gofer client to disown dentries for cache eviction triggered by dentry reference count changes. - Rename FilesystemType.NewFilesystem to GetFilesystem; in some cases (e.g. sysfs, cgroupfs) it's much cleaner for there to be only one Filesystem that is used by all mounts, and in at least one case (devtmpfs) it's visibly incorrect not to do so, so NewFilesystem doesn't always actually create and return a *new* Filesystem. - Require callers of FileDescription.Init() to increment Mount/Dentry references. This is because the gofer client may, in the OpenAt() path, take a reference on a dentry with 0 references, which is safe due to synchronization that is outside the scope of this CL, and it would be safer to still have its implementation of DentryImpl.IncRef() check for an increment for 0 references in other cases. - Add FileDescription.TryIncRef. This is used by the gofer client to take references on "special file descriptions" (FDs for files such as pipes, sockets, and devices), which use per-FD handles (fids) instead of dentry-shared handles, for sync() and syncfs(). PiperOrigin-RevId: 282473364 --- .../fsimpl/ext/benchmark/benchmark_test.go | 4 +- pkg/sentry/fsimpl/ext/block_map_file.go | 4 +- pkg/sentry/fsimpl/ext/block_map_test.go | 4 +- pkg/sentry/fsimpl/ext/dentry.go | 10 ++-- pkg/sentry/fsimpl/ext/ext.go | 10 ++-- pkg/sentry/fsimpl/ext/ext_test.go | 14 ++--- pkg/sentry/fsimpl/ext/extent_file.go | 6 +- pkg/sentry/fsimpl/ext/extent_test.go | 4 +- pkg/sentry/fsimpl/ext/file_description.go | 4 +- pkg/sentry/fsimpl/ext/inode.go | 28 ++++++---- pkg/sentry/fsimpl/memfs/benchmark_test.go | 6 +- pkg/sentry/fsimpl/memfs/filesystem.go | 15 +++-- pkg/sentry/fsimpl/memfs/memfs.go | 16 +++--- pkg/sentry/fsimpl/memfs/named_pipe.go | 5 +- pkg/sentry/fsimpl/memfs/pipe_test.go | 2 +- pkg/sentry/vfs/dentry.go | 24 ++++---- pkg/sentry/vfs/file_description.go | 33 ++++++++++- .../vfs/file_description_impl_util_test.go | 2 +- pkg/sentry/vfs/filesystem.go | 20 +++++-- pkg/sentry/vfs/filesystem_type.go | 10 ++-- pkg/sentry/vfs/mount.go | 56 ++++++++++--------- pkg/sentry/vfs/resolving_path.go | 8 +-- pkg/sentry/vfs/syscalls.go | 2 +- pkg/sentry/vfs/testutil.go | 12 ++-- pkg/sentry/vfs/vfs.go | 8 +-- 25 files changed, 186 insertions(+), 121 deletions(-) diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go index 10a8083a0..94cd74095 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go +++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go @@ -50,7 +50,7 @@ func setUp(b *testing.B, imagePath string) (context.Context, *vfs.VirtualFilesys // Create VFS. vfsObj := vfs.New() vfsObj.MustRegisterFilesystemType("extfs", ext.FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, imagePath, "extfs", &vfs.NewFilesystemOptions{InternalData: int(f.Fd())}) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, imagePath, "extfs", &vfs.GetFilesystemOptions{InternalData: int(f.Fd())}) if err != nil { f.Close() return nil, nil, nil, nil, err @@ -81,7 +81,7 @@ func mount(b *testing.B, imagePath string, vfsfs *vfs.VirtualFilesystem, pop *vf ctx := contexttest.Context(b) creds := auth.CredentialsFromContext(ctx) - if err := vfsfs.NewMount(ctx, creds, imagePath, pop, "extfs", &vfs.NewFilesystemOptions{InternalData: int(f.Fd())}); err != nil { + if err := vfsfs.NewMount(ctx, creds, imagePath, pop, "extfs", &vfs.GetFilesystemOptions{InternalData: int(f.Fd())}); err != nil { b.Fatalf("failed to mount tmpfs submount: %v", err) } return func() { diff --git a/pkg/sentry/fsimpl/ext/block_map_file.go b/pkg/sentry/fsimpl/ext/block_map_file.go index cea89bcd9..a2d8c3ad6 100644 --- a/pkg/sentry/fsimpl/ext/block_map_file.go +++ b/pkg/sentry/fsimpl/ext/block_map_file.go @@ -154,7 +154,7 @@ func (f *blockMapFile) read(curPhyBlk uint32, relFileOff uint64, height uint, ds toRead = len(dst) } - n, _ := f.regFile.inode.dev.ReadAt(dst[:toRead], curPhyBlkOff+int64(relFileOff)) + n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], curPhyBlkOff+int64(relFileOff)) if n < toRead { return n, syserror.EIO } @@ -174,7 +174,7 @@ func (f *blockMapFile) read(curPhyBlk uint32, relFileOff uint64, height uint, ds curChildOff := relFileOff % childCov for i := startIdx; i < endIdx; i++ { var childPhyBlk uint32 - err := readFromDisk(f.regFile.inode.dev, curPhyBlkOff+int64(i*4), &childPhyBlk) + err := readFromDisk(f.regFile.inode.fs.dev, curPhyBlkOff+int64(i*4), &childPhyBlk) if err != nil { return read, err } diff --git a/pkg/sentry/fsimpl/ext/block_map_test.go b/pkg/sentry/fsimpl/ext/block_map_test.go index 213aa3919..181727ef7 100644 --- a/pkg/sentry/fsimpl/ext/block_map_test.go +++ b/pkg/sentry/fsimpl/ext/block_map_test.go @@ -87,12 +87,14 @@ func blockMapSetUp(t *testing.T) (*blockMapFile, []byte) { mockDisk := make([]byte, mockBMDiskSize) regFile := regularFile{ inode: inode{ + fs: &filesystem{ + dev: bytes.NewReader(mockDisk), + }, diskInode: &disklayout.InodeNew{ InodeOld: disklayout.InodeOld{ SizeLo: getMockBMFileFize(), }, }, - dev: bytes.NewReader(mockDisk), blkSize: uint64(mockBMBlkSize), }, } diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go index 054fb42b6..a080cb189 100644 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ b/pkg/sentry/fsimpl/ext/dentry.go @@ -41,16 +41,18 @@ func newDentry(in *inode) *dentry { } // IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef(vfsfs *vfs.Filesystem) { +func (d *dentry) IncRef() { d.inode.incRef() } // TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool { +func (d *dentry) TryIncRef() bool { return d.inode.tryIncRef() } // DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef(vfsfs *vfs.Filesystem) { - d.inode.decRef(vfsfs.Impl().(*filesystem)) +func (d *dentry) DecRef() { + // FIXME(b/134676337): filesystem.mu may not be locked as required by + // inode.decRef(). + d.inode.decRef() } diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go index f10accafc..4b7d17dc6 100644 --- a/pkg/sentry/fsimpl/ext/ext.go +++ b/pkg/sentry/fsimpl/ext/ext.go @@ -40,14 +40,14 @@ var _ vfs.FilesystemType = (*FilesystemType)(nil) // Currently there are two ways of mounting an ext(2/3/4) fs: // 1. Specify a mount with our internal special MountType in the OCI spec. // 2. Expose the device to the container and mount it from application layer. -func getDeviceFd(source string, opts vfs.NewFilesystemOptions) (io.ReaderAt, error) { +func getDeviceFd(source string, opts vfs.GetFilesystemOptions) (io.ReaderAt, error) { if opts.InternalData == nil { // User mount call. // TODO(b/134676337): Open the device specified by `source` and return that. panic("unimplemented") } - // NewFilesystem call originated from within the sentry. + // GetFilesystem call originated from within the sentry. devFd, ok := opts.InternalData.(int) if !ok { return nil, errors.New("internal data for ext fs must be an int containing the file descriptor to device") @@ -91,8 +91,8 @@ func isCompatible(sb disklayout.SuperBlock) bool { return true } -// NewFilesystem implements vfs.FilesystemType.NewFilesystem. -func (FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { // TODO(b/134676337): Ensure that the user is mounting readonly. If not, // EACCESS should be returned according to mount(2). Filesystem independent // flags (like readonly) are currently not available in pkg/sentry/vfs. @@ -103,7 +103,7 @@ func (FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials } fs := filesystem{dev: dev, inodeCache: make(map[uint32]*inode)} - fs.vfsfs.Init(&fs) + fs.vfsfs.Init(vfsObj, &fs) fs.sb, err = readSuperBlock(dev) if err != nil { return nil, nil, err diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go index 1aa2bd6a4..307e4d68c 100644 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ b/pkg/sentry/fsimpl/ext/ext_test.go @@ -66,7 +66,7 @@ func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesys // Create VFS. vfsObj := vfs.New() vfsObj.MustRegisterFilesystemType("extfs", FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.NewFilesystemOptions{InternalData: int(f.Fd())}) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.GetFilesystemOptions{InternalData: int(f.Fd())}) if err != nil { f.Close() return nil, nil, nil, nil, err @@ -509,27 +509,27 @@ func TestIterDirents(t *testing.T) { } wantDirents := []vfs.Dirent{ - vfs.Dirent{ + { Name: ".", Type: linux.DT_DIR, }, - vfs.Dirent{ + { Name: "..", Type: linux.DT_DIR, }, - vfs.Dirent{ + { Name: "lost+found", Type: linux.DT_DIR, }, - vfs.Dirent{ + { Name: "file.txt", Type: linux.DT_REG, }, - vfs.Dirent{ + { Name: "bigfile.txt", Type: linux.DT_REG, }, - vfs.Dirent{ + { Name: "symlink.txt", Type: linux.DT_LNK, }, diff --git a/pkg/sentry/fsimpl/ext/extent_file.go b/pkg/sentry/fsimpl/ext/extent_file.go index 38b68a2d3..3d3ebaca6 100644 --- a/pkg/sentry/fsimpl/ext/extent_file.go +++ b/pkg/sentry/fsimpl/ext/extent_file.go @@ -99,7 +99,7 @@ func (f *extentFile) buildExtTree() error { func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) { var header disklayout.ExtentHeader off := entry.PhysicalBlock() * f.regFile.inode.blkSize - err := readFromDisk(f.regFile.inode.dev, int64(off), &header) + err := readFromDisk(f.regFile.inode.fs.dev, int64(off), &header) if err != nil { return nil, err } @@ -115,7 +115,7 @@ func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*diskla curEntry = &disklayout.ExtentIdx{} } - err := readFromDisk(f.regFile.inode.dev, int64(off), curEntry) + err := readFromDisk(f.regFile.inode.fs.dev, int64(off), curEntry) if err != nil { return nil, err } @@ -229,7 +229,7 @@ func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byt toRead = len(dst) } - n, _ := f.regFile.inode.dev.ReadAt(dst[:toRead], int64(readStart)) + n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], int64(readStart)) if n < toRead { return n, syserror.EIO } diff --git a/pkg/sentry/fsimpl/ext/extent_test.go b/pkg/sentry/fsimpl/ext/extent_test.go index 42d0a484b..a2382daa3 100644 --- a/pkg/sentry/fsimpl/ext/extent_test.go +++ b/pkg/sentry/fsimpl/ext/extent_test.go @@ -180,13 +180,15 @@ func extentTreeSetUp(t *testing.T, root *disklayout.ExtentNode) (*extentFile, [] mockExtentFile := &extentFile{ regFile: regularFile{ inode: inode{ + fs: &filesystem{ + dev: bytes.NewReader(mockDisk), + }, diskInode: &disklayout.InodeNew{ InodeOld: disklayout.InodeOld{ SizeLo: uint32(mockExtentBlkSize) * getNumPhyBlks(root), }, }, blkSize: mockExtentBlkSize, - dev: bytes.NewReader(mockDisk), }, }, } diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go index 4d18b28cb..5eca2b83f 100644 --- a/pkg/sentry/fsimpl/ext/file_description.go +++ b/pkg/sentry/fsimpl/ext/file_description.go @@ -36,11 +36,11 @@ type fileDescription struct { } func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem) + return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) } func (fd *fileDescription) inode() *inode { - return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode + return fd.vfsfd.Dentry().Impl().(*dentry).inode } // StatusFlags implements vfs.FileDescriptionImpl.StatusFlags. diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go index e6c847a71..24249525c 100644 --- a/pkg/sentry/fsimpl/ext/inode.go +++ b/pkg/sentry/fsimpl/ext/inode.go @@ -16,7 +16,6 @@ package ext import ( "fmt" - "io" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -42,13 +41,13 @@ type inode struct { // refs is a reference count. refs is accessed using atomic memory operations. refs int64 + // fs is the containing filesystem. + fs *filesystem + // inodeNum is the inode number of this inode on disk. This is used to // identify inodes within the ext filesystem. inodeNum uint32 - // dev represents the underlying device. Same as filesystem.dev. - dev io.ReaderAt - // blkSize is the fs data block size. Same as filesystem.sb.BlockSize(). blkSize uint64 @@ -81,10 +80,10 @@ func (in *inode) tryIncRef() bool { // decRef decrements the inode ref count and releases the inode resources if // the ref count hits 0. // -// Precondition: Must have locked fs.mu. -func (in *inode) decRef(fs *filesystem) { +// Precondition: Must have locked filesystem.mu. +func (in *inode) decRef() { if refs := atomic.AddInt64(&in.refs, -1); refs == 0 { - delete(fs.inodeCache, in.inodeNum) + delete(in.fs.inodeCache, in.inodeNum) } else if refs < 0 { panic("ext.inode.decRef() called without holding a reference") } @@ -117,8 +116,8 @@ func newInode(fs *filesystem, inodeNum uint32) (*inode, error) { // Build the inode based on its type. inode := inode{ + fs: fs, inodeNum: inodeNum, - dev: fs.dev, blkSize: blkSize, diskInode: diskInode, } @@ -154,11 +153,14 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v if err := in.checkPermissions(rp.Credentials(), ats); err != nil { return nil, err } + mnt := rp.Mount() switch in.impl.(type) { case *regularFile: var fd regularFileFD fd.flags = flags - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) + mnt.IncRef() + vfsd.IncRef() + fd.vfsfd.Init(&fd, mnt, vfsd) return &fd.vfsfd, nil case *directory: // Can't open directories writably. This check is not necessary for a read @@ -167,8 +169,10 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v return nil, syserror.EISDIR } var fd directoryFD - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) fd.flags = flags + mnt.IncRef() + vfsd.IncRef() + fd.vfsfd.Init(&fd, mnt, vfsd) return &fd.vfsfd, nil case *symlink: if flags&linux.O_PATH == 0 { @@ -177,7 +181,9 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v } var fd symlinkFD fd.flags = flags - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) + mnt.IncRef() + vfsd.IncRef() + fd.vfsfd.Init(&fd, mnt, vfsd) return &fd.vfsfd, nil default: panic(fmt.Sprintf("unknown inode type: %T", in.impl)) diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go index 23a846c08..ea6417ce7 100644 --- a/pkg/sentry/fsimpl/memfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/memfs/benchmark_test.go @@ -176,7 +176,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { // Create VFS. vfsObj := vfs.New() vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{}) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{}) if err != nil { b.Fatalf("failed to create tmpfs root mount: %v", err) } @@ -365,7 +365,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { // Create VFS. vfsObj := vfs.New() vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{}) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{}) if err != nil { b.Fatalf("failed to create tmpfs root mount: %v", err) } @@ -394,7 +394,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { } defer mountPoint.DecRef() // Create and mount the submount. - if err := vfsObj.NewMount(ctx, creds, "", &pop, "memfs", &vfs.NewFilesystemOptions{}); err != nil { + if err := vfsObj.NewMount(ctx, creds, "", &pop, "memfs", &vfs.GetFilesystemOptions{}); err != nil { b.Fatalf("failed to mount tmpfs submount: %v", err) } filePathBuilder.WriteString(mountPointName) diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go index f006c40cd..08a9cb8ef 100644 --- a/pkg/sentry/fsimpl/memfs/filesystem.go +++ b/pkg/sentry/fsimpl/memfs/filesystem.go @@ -159,7 +159,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op return nil, err } } - inode.incRef() // vfsd.IncRef(&fs.vfsfs) + inode.incRef() return vfsd, nil } @@ -379,6 +379,7 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr return nil, err } } + mnt := rp.Mount() switch impl := i.impl.(type) { case *regularFile: var fd regularFileFD @@ -386,12 +387,14 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr fd.readable = vfs.MayReadFileWithOpenFlags(flags) fd.writable = vfs.MayWriteFileWithOpenFlags(flags) if fd.writable { - if err := rp.Mount().CheckBeginWrite(); err != nil { + if err := mnt.CheckBeginWrite(); err != nil { return nil, err } - // Mount.EndWrite() is called by regularFileFD.Release(). + // mnt.EndWrite() is called by regularFileFD.Release(). } - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) + mnt.IncRef() + vfsd.IncRef() + fd.vfsfd.Init(&fd, mnt, vfsd) if flags&linux.O_TRUNC != 0 { impl.mu.Lock() impl.data = impl.data[:0] @@ -405,7 +408,9 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr return nil, syserror.EISDIR } var fd directoryFD - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) + mnt.IncRef() + vfsd.IncRef() + fd.vfsfd.Init(&fd, mnt, vfsd) fd.flags = flags return &fd.vfsfd, nil case *symlink: diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go index 64c851c1a..4cb2a4e0f 100644 --- a/pkg/sentry/fsimpl/memfs/memfs.go +++ b/pkg/sentry/fsimpl/memfs/memfs.go @@ -52,10 +52,10 @@ type filesystem struct { nextInoMinusOne uint64 // accessed using atomic memory operations } -// NewFilesystem implements vfs.FilesystemType.NewFilesystem. -func (fstype FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { var fs filesystem - fs.vfsfs.Init(&fs) + fs.vfsfs.Init(vfsObj, &fs) root := fs.newDentry(fs.newDirectory(creds, 01777)) return &fs.vfsfs, &root.vfsd, nil } @@ -99,17 +99,17 @@ func (fs *filesystem) newDentry(inode *inode) *dentry { } // IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef(vfsfs *vfs.Filesystem) { +func (d *dentry) IncRef() { d.inode.incRef() } // TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool { +func (d *dentry) TryIncRef() bool { return d.inode.tryIncRef() } // DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef(vfsfs *vfs.Filesystem) { +func (d *dentry) DecRef() { d.inode.decRef() } @@ -266,11 +266,11 @@ type fileDescription struct { } func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem) + return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) } func (fd *fileDescription) inode() *inode { - return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode + return fd.vfsfd.Dentry().Impl().(*dentry).inode } // StatusFlags implements vfs.FileDescriptionImpl.StatusFlags. diff --git a/pkg/sentry/fsimpl/memfs/named_pipe.go b/pkg/sentry/fsimpl/memfs/named_pipe.go index 732ed7c58..91cb4b1fc 100644 --- a/pkg/sentry/fsimpl/memfs/named_pipe.go +++ b/pkg/sentry/fsimpl/memfs/named_pipe.go @@ -54,6 +54,9 @@ func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, v if err != nil { return nil, err } - fd.vfsfd.Init(&fd, rp.Mount(), vfsd) + mnt := rp.Mount() + mnt.IncRef() + vfsd.IncRef() + fd.vfsfd.Init(&fd, mnt, vfsd) return &fd.vfsfd, nil } diff --git a/pkg/sentry/fsimpl/memfs/pipe_test.go b/pkg/sentry/fsimpl/memfs/pipe_test.go index 0674b81a3..a3a870571 100644 --- a/pkg/sentry/fsimpl/memfs/pipe_test.go +++ b/pkg/sentry/fsimpl/memfs/pipe_test.go @@ -152,7 +152,7 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy // Create VFS. vfsObj := vfs.New() vfsObj.MustRegisterFilesystemType("memfs", FilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{}) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{}) if err != nil { t.Fatalf("failed to create tmpfs root mount: %v", err) } diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index 09ed5a70e..40f4c1d09 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -118,7 +118,7 @@ func (d *Dentry) Impl() DentryImpl { type DentryImpl interface { // IncRef increments the Dentry's reference count. A Dentry with a non-zero // reference count must remain coherent with the state of the filesystem. - IncRef(fs *Filesystem) + IncRef() // TryIncRef increments the Dentry's reference count and returns true. If // the Dentry's reference count is zero, TryIncRef may do nothing and @@ -126,10 +126,10 @@ type DentryImpl interface { // guarantee that the Dentry is coherent with the state of the filesystem.) // // TryIncRef does not require that a reference is held on the Dentry. - TryIncRef(fs *Filesystem) bool + TryIncRef() bool // DecRef decrements the Dentry's reference count. - DecRef(fs *Filesystem) + DecRef() } // IsDisowned returns true if d is disowned. @@ -146,16 +146,20 @@ func (d *Dentry) isMounted() bool { return atomic.LoadUint32(&d.mounts) != 0 } -func (d *Dentry) incRef(fs *Filesystem) { - d.impl.IncRef(fs) +// IncRef increments d's reference count. +func (d *Dentry) IncRef() { + d.impl.IncRef() } -func (d *Dentry) tryIncRef(fs *Filesystem) bool { - return d.impl.TryIncRef(fs) +// TryIncRef increments d's reference count and returns true. If d's reference +// count is zero, TryIncRef may instead do nothing and return false. +func (d *Dentry) TryIncRef() bool { + return d.impl.TryIncRef() } -func (d *Dentry) decRef(fs *Filesystem) { - d.impl.DecRef(fs) +// DecRef decrements d's reference count. +func (d *Dentry) DecRef() { + d.impl.DecRef() } // These functions are exported so that filesystem implementations can use @@ -420,6 +424,6 @@ func (vfs *VirtualFilesystem) forgetDisownedMountpoint(d *Dentry) { vd.DecRef() } for _, mnt := range mountsToDecRef { - mnt.decRef() + mnt.DecRef() } } diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 3a9665800..34007eb57 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -47,15 +47,14 @@ type FileDescription struct { impl FileDescriptionImpl } -// Init must be called before first use of fd. It takes references on mnt and -// d. +// Init must be called before first use of fd. It takes ownership of references +// on mnt and d held by the caller. func (fd *FileDescription) Init(impl FileDescriptionImpl, mnt *Mount, d *Dentry) { fd.refs = 1 fd.vd = VirtualDentry{ mount: mnt, dentry: d, } - fd.vd.IncRef() fd.impl = impl } @@ -64,6 +63,18 @@ func (fd *FileDescription) Impl() FileDescriptionImpl { return fd.impl } +// Mount returns the mount on which fd was opened. It does not take a reference +// on the returned Mount. +func (fd *FileDescription) Mount() *Mount { + return fd.vd.mount +} + +// Dentry returns the dentry at which fd was opened. It does not take a +// reference on the returned Dentry. +func (fd *FileDescription) Dentry() *Dentry { + return fd.vd.dentry +} + // VirtualDentry returns the location at which fd was opened. It does not take // a reference on the returned VirtualDentry. func (fd *FileDescription) VirtualDentry() VirtualDentry { @@ -75,6 +86,22 @@ func (fd *FileDescription) IncRef() { atomic.AddInt64(&fd.refs, 1) } +// TryIncRef increments fd's reference count and returns true. If fd's +// reference count is already zero, TryIncRef does nothing and returns false. +// +// TryIncRef does not require that a reference is held on fd. +func (fd *FileDescription) TryIncRef() bool { + for { + refs := atomic.LoadInt64(&fd.refs) + if refs <= 0 { + return false + } + if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) { + return true + } + } +} + // DecRef decrements fd's reference count. func (fd *FileDescription) DecRef() { if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 511b829fc..a5561dcbe 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -90,7 +90,7 @@ func TestGenCountFD(t *testing.T) { vfsObj := New() // vfs.New() vfsObj.MustRegisterFilesystemType("testfs", FDTestFilesystemType{}) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "testfs", &NewFilesystemOptions{}) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "testfs", &GetFilesystemOptions{}) if err != nil { t.Fatalf("failed to create testfs root mount: %v", err) } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 7a074b718..76ff8cf51 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -33,29 +33,41 @@ type Filesystem struct { // operations. refs int64 + // vfs is the VirtualFilesystem that uses this Filesystem. vfs is + // immutable. + vfs *VirtualFilesystem + // impl is the FilesystemImpl associated with this Filesystem. impl is // immutable. This should be the last field in Dentry. impl FilesystemImpl } // Init must be called before first use of fs. -func (fs *Filesystem) Init(impl FilesystemImpl) { +func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, impl FilesystemImpl) { fs.refs = 1 + fs.vfs = vfsObj fs.impl = impl } +// VirtualFilesystem returns the containing VirtualFilesystem. +func (fs *Filesystem) VirtualFilesystem() *VirtualFilesystem { + return fs.vfs +} + // Impl returns the FilesystemImpl associated with fs. func (fs *Filesystem) Impl() FilesystemImpl { return fs.impl } -func (fs *Filesystem) incRef() { +// IncRef increments fs' reference count. +func (fs *Filesystem) IncRef() { if atomic.AddInt64(&fs.refs, 1) <= 1 { - panic("Filesystem.incRef() called without holding a reference") + panic("Filesystem.IncRef() called without holding a reference") } } -func (fs *Filesystem) decRef() { +// DecRef decrements fs' reference count. +func (fs *Filesystem) DecRef() { if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 { fs.impl.Release() } else if refs < 0 { diff --git a/pkg/sentry/vfs/filesystem_type.go b/pkg/sentry/vfs/filesystem_type.go index f401ad7f3..c335e206d 100644 --- a/pkg/sentry/vfs/filesystem_type.go +++ b/pkg/sentry/vfs/filesystem_type.go @@ -25,21 +25,21 @@ import ( // // FilesystemType is analogous to Linux's struct file_system_type. type FilesystemType interface { - // NewFilesystem returns a Filesystem configured by the given options, + // GetFilesystem returns a Filesystem configured by the given options, // along with its mount root. A reference is taken on the returned // Filesystem and Dentry. - NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts NewFilesystemOptions) (*Filesystem, *Dentry, error) + GetFilesystem(ctx context.Context, vfsObj *VirtualFilesystem, creds *auth.Credentials, source string, opts GetFilesystemOptions) (*Filesystem, *Dentry, error) } -// NewFilesystemOptions contains options to FilesystemType.NewFilesystem. -type NewFilesystemOptions struct { +// GetFilesystemOptions contains options to FilesystemType.GetFilesystem. +type GetFilesystemOptions struct { // Data is the string passed as the 5th argument to mount(2), which is // usually a comma-separated list of filesystem-specific mount options. Data string // InternalData holds opaque FilesystemType-specific data. There is // intentionally no way for applications to specify InternalData; if it is - // not nil, the call to NewFilesystem originates from within the sentry. + // not nil, the call to GetFilesystem originates from within the sentry. InternalData interface{} } diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 198fb8067..1c3b2e987 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -110,12 +110,12 @@ type MountNamespace struct { // NewMountNamespace returns a new mount namespace with a root filesystem // configured by the given arguments. A reference is taken on the returned // MountNamespace. -func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *NewFilesystemOptions) (*MountNamespace, error) { +func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *GetFilesystemOptions) (*MountNamespace, error) { fsType := vfs.getFilesystemType(fsTypeName) if fsType == nil { return nil, syserror.ENODEV } - fs, root, err := fsType.NewFilesystem(ctx, creds, source, *opts) + fs, root, err := fsType.GetFilesystem(ctx, vfs, creds, source, *opts) if err != nil { return nil, err } @@ -133,13 +133,13 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth return mntns, nil } -// NewMount creates and mounts a new Filesystem. -func (vfs *VirtualFilesystem) NewMount(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *NewFilesystemOptions) error { +// NewMount creates and mounts a Filesystem configured by the given arguments. +func (vfs *VirtualFilesystem) NewMount(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *GetFilesystemOptions) error { fsType := vfs.getFilesystemType(fsTypeName) if fsType == nil { return syserror.ENODEV } - fs, root, err := fsType.NewFilesystem(ctx, creds, source, *opts) + fs, root, err := fsType.GetFilesystem(ctx, vfs, creds, source, *opts) if err != nil { return err } @@ -147,8 +147,8 @@ func (vfs *VirtualFilesystem) NewMount(ctx context.Context, creds *auth.Credenti // lock ordering. vd, err := vfs.GetDentryAt(ctx, creds, target, &GetDentryOptions{}) if err != nil { - root.decRef(fs) - fs.decRef() + root.DecRef() + fs.DecRef() return err } vfs.mountMu.Lock() @@ -158,8 +158,8 @@ func (vfs *VirtualFilesystem) NewMount(ctx context.Context, creds *auth.Credenti vd.dentry.mu.Unlock() vfs.mountMu.Unlock() vd.DecRef() - root.decRef(fs) - fs.decRef() + root.DecRef() + fs.DecRef() return syserror.ENOENT } // vd might have been mounted over between vfs.GetDentryAt() and @@ -179,7 +179,7 @@ func (vfs *VirtualFilesystem) NewMount(ctx context.Context, creds *auth.Credenti break } // This can't fail since we're holding vfs.mountMu. - nextmnt.root.incRef(nextmnt.fs) + nextmnt.root.IncRef() vd.dentry.mu.Unlock() vd.DecRef() vd = VirtualDentry{ @@ -229,7 +229,7 @@ type umountRecursiveOptions struct { // Mounts on which references must be dropped to vdsToDecRef and mountsToDecRef // respectively, and returns updated slices. (This is necessary because // filesystem locks possibly taken by DentryImpl.DecRef() may precede -// vfs.mountMu in the lock order, and Mount.decRef() may lock vfs.mountMu.) +// vfs.mountMu in the lock order, and Mount.DecRef() may lock vfs.mountMu.) // // umountRecursiveLocked is analogous to Linux's fs/namespace.c:umount_tree(). // @@ -322,13 +322,15 @@ func (mnt *Mount) tryIncMountedRef() bool { } } -func (mnt *Mount) incRef() { +// IncRef increments mnt's reference count. +func (mnt *Mount) IncRef() { // In general, negative values for mnt.refs are valid because the MSB is // the eager-unmount bit. atomic.AddInt64(&mnt.refs, 1) } -func (mnt *Mount) decRef() { +// DecRef decrements mnt's reference count. +func (mnt *Mount) DecRef() { refs := atomic.AddInt64(&mnt.refs, -1) if refs&^math.MinInt64 == 0 { // mask out MSB var vd VirtualDentry @@ -339,8 +341,8 @@ func (mnt *Mount) decRef() { mnt.vfs.mounts.seq.EndWrite() mnt.vfs.mountMu.Unlock() } - mnt.root.decRef(mnt.fs) - mnt.fs.decRef() + mnt.root.DecRef() + mnt.fs.DecRef() if vd.Ok() { vd.DecRef() } @@ -368,7 +370,7 @@ func (mntns *MountNamespace) DecRef(vfs *VirtualFilesystem) { vd.DecRef() } for _, mnt := range mountsToDecRef { - mnt.decRef() + mnt.DecRef() } } else if refs < 0 { panic("MountNamespace.DecRef() called without holding a reference") @@ -413,7 +415,7 @@ retryFirst: // Raced with umount. continue } - mnt.decRef() + mnt.DecRef() mnt = next d = next.root } @@ -447,15 +449,15 @@ retryFirst: // Raced with umount. goto retryFirst } - if !point.tryIncRef(parent.fs) { + if !point.TryIncRef() { // Since Mount holds a reference on Mount.key.point, this can only // happen due to a racing change to Mount.key. - parent.decRef() + parent.DecRef() goto retryFirst } if !vfs.mounts.seq.ReadOk(epoch) { - point.decRef(parent.fs) - parent.decRef() + point.DecRef() + parent.DecRef() goto retryFirst } mnt = parent @@ -480,19 +482,19 @@ retryFirst: // Raced with umount. goto retryNotFirst } - if !point.tryIncRef(parent.fs) { + if !point.TryIncRef() { // Since Mount holds a reference on Mount.key.point, this can // only happen due to a racing change to Mount.key. - parent.decRef() + parent.DecRef() goto retryNotFirst } if !vfs.mounts.seq.ReadOk(epoch) { - point.decRef(parent.fs) - parent.decRef() + point.DecRef() + parent.DecRef() goto retryNotFirst } - d.decRef(mnt.fs) - mnt.decRef() + d.DecRef() + mnt.DecRef() mnt = parent d = point } diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go index 61bce6426..621f5a6f8 100644 --- a/pkg/sentry/vfs/resolving_path.go +++ b/pkg/sentry/vfs/resolving_path.go @@ -149,20 +149,20 @@ func (vfs *VirtualFilesystem) putResolvingPath(rp *ResolvingPath) { func (rp *ResolvingPath) decRefStartAndMount() { if rp.flags&rpflagsHaveStartRef != 0 { - rp.start.decRef(rp.mount.fs) + rp.start.DecRef() } if rp.flags&rpflagsHaveMountRef != 0 { - rp.mount.decRef() + rp.mount.DecRef() } } func (rp *ResolvingPath) releaseErrorState() { if rp.nextStart != nil { - rp.nextStart.decRef(rp.nextMount.fs) + rp.nextStart.DecRef() rp.nextStart = nil } if rp.nextMount != nil { - rp.nextMount.decRef() + rp.nextMount.DecRef() rp.nextMount = nil } } diff --git a/pkg/sentry/vfs/syscalls.go b/pkg/sentry/vfs/syscalls.go index 49952b2cc..436151afa 100644 --- a/pkg/sentry/vfs/syscalls.go +++ b/pkg/sentry/vfs/syscalls.go @@ -63,7 +63,7 @@ func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Crede mount: rp.mount, dentry: d, } - rp.mount.incRef() + rp.mount.IncRef() vfs.putResolvingPath(rp) return vd, nil } diff --git a/pkg/sentry/vfs/testutil.go b/pkg/sentry/vfs/testutil.go index 70b192ece..593144cb7 100644 --- a/pkg/sentry/vfs/testutil.go +++ b/pkg/sentry/vfs/testutil.go @@ -33,10 +33,10 @@ type FDTestFilesystem struct { vfsfs Filesystem } -// NewFilesystem implements FilesystemType.NewFilesystem. -func (fstype FDTestFilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts NewFilesystemOptions) (*Filesystem, *Dentry, error) { +// GetFilesystem implements FilesystemType.GetFilesystem. +func (fstype FDTestFilesystemType) GetFilesystem(ctx context.Context, vfsObj *VirtualFilesystem, creds *auth.Credentials, source string, opts GetFilesystemOptions) (*Filesystem, *Dentry, error) { var fs FDTestFilesystem - fs.vfsfs.Init(&fs) + fs.vfsfs.Init(vfsObj, &fs) return &fs.vfsfs, fs.NewDentry(), nil } @@ -126,14 +126,14 @@ func (fs *FDTestFilesystem) NewDentry() *Dentry { } // IncRef implements DentryImpl.IncRef. -func (d *fdTestDentry) IncRef(vfsfs *Filesystem) { +func (d *fdTestDentry) IncRef() { } // TryIncRef implements DentryImpl.TryIncRef. -func (d *fdTestDentry) TryIncRef(vfsfs *Filesystem) bool { +func (d *fdTestDentry) TryIncRef() bool { return true } // DecRef implements DentryImpl.DecRef. -func (d *fdTestDentry) DecRef(vfsfs *Filesystem) { +func (d *fdTestDentry) DecRef() { } diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 88e865d86..f0cd3ffe5 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -116,15 +116,15 @@ func (vd VirtualDentry) Ok() bool { // IncRef increments the reference counts on the Mount and Dentry represented // by vd. func (vd VirtualDentry) IncRef() { - vd.mount.incRef() - vd.dentry.incRef(vd.mount.fs) + vd.mount.IncRef() + vd.dentry.IncRef() } // DecRef decrements the reference counts on the Mount and Dentry represented // by vd. func (vd VirtualDentry) DecRef() { - vd.dentry.decRef(vd.mount.fs) - vd.mount.decRef() + vd.dentry.DecRef() + vd.mount.DecRef() } // Mount returns the Mount associated with vd. It does not take a reference on