549 lines
16 KiB
Go
549 lines
16 KiB
Go
// Copyright 2018 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package ramfs provides the fundamentals for a simple in-memory filesystem.
|
|
package ramfs
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"syscall"
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
"gvisor.dev/gvisor/pkg/sentry/context"
|
|
"gvisor.dev/gvisor/pkg/sentry/fs"
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
)
|
|
|
|
// CreateOps represents operations to create different file types.
|
|
type CreateOps struct {
|
|
// NewDir creates a new directory.
|
|
NewDir func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error)
|
|
|
|
// NewFile creates a new file.
|
|
NewFile func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error)
|
|
|
|
// NewSymlink creates a new symlink with permissions 0777.
|
|
NewSymlink func(ctx context.Context, dir *fs.Inode, target string) (*fs.Inode, error)
|
|
|
|
// NewBoundEndpoint creates a new socket.
|
|
NewBoundEndpoint func(ctx context.Context, dir *fs.Inode, ep transport.BoundEndpoint, perms fs.FilePermissions) (*fs.Inode, error)
|
|
|
|
// NewFifo creates a new fifo.
|
|
NewFifo func(ctx context.Context, dir *fs.Inode, perm fs.FilePermissions) (*fs.Inode, error)
|
|
}
|
|
|
|
// Dir represents a single directory in the filesystem.
|
|
//
|
|
// +stateify savable
|
|
type Dir struct {
|
|
fsutil.InodeGenericChecker `state:"nosave"`
|
|
fsutil.InodeIsDirAllocate `state:"nosave"`
|
|
fsutil.InodeIsDirTruncate `state:"nosave"`
|
|
fsutil.InodeNoopWriteOut `state:"nosave"`
|
|
fsutil.InodeNotMappable `state:"nosave"`
|
|
fsutil.InodeNotSocket `state:"nosave"`
|
|
fsutil.InodeNotSymlink `state:"nosave"`
|
|
fsutil.InodeVirtual `state:"nosave"`
|
|
|
|
fsutil.InodeSimpleAttributes
|
|
fsutil.InodeSimpleExtendedAttributes
|
|
|
|
// CreateOps may be provided.
|
|
//
|
|
// These may only be modified during initialization (while the application
|
|
// is not running). No sychronization is performed when accessing these
|
|
// operations during syscalls.
|
|
*CreateOps `state:"nosave"`
|
|
|
|
// mu protects the fields below.
|
|
mu sync.Mutex `state:"nosave"`
|
|
|
|
// children are inodes that are in this directory. A reference is held
|
|
// on each inode while it is in the map.
|
|
children map[string]*fs.Inode
|
|
|
|
// dentryMap is a sortedDentryMap containing entries for all children.
|
|
// Its entries are kept up-to-date with d.children.
|
|
dentryMap *fs.SortedDentryMap
|
|
}
|
|
|
|
var _ fs.InodeOperations = (*Dir)(nil)
|
|
|
|
// NewDir returns a new Dir with the given contents and attributes. A reference
|
|
// on each fs.Inode in the `contents` map will be donated to this Dir.
|
|
func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions) *Dir {
|
|
d := &Dir{
|
|
InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, perms, linux.RAMFS_MAGIC),
|
|
}
|
|
|
|
if contents == nil {
|
|
contents = make(map[string]*fs.Inode)
|
|
}
|
|
d.children = contents
|
|
|
|
// Build the entries map ourselves, rather than calling addChildLocked,
|
|
// because it will be faster.
|
|
entries := make(map[string]fs.DentAttr, len(contents))
|
|
for name, inode := range contents {
|
|
entries[name] = fs.DentAttr{
|
|
Type: inode.StableAttr.Type,
|
|
InodeID: inode.StableAttr.InodeID,
|
|
}
|
|
}
|
|
d.dentryMap = fs.NewSortedDentryMap(entries)
|
|
|
|
// Directories have an extra link, corresponding to '.'.
|
|
d.AddLink()
|
|
|
|
return d
|
|
}
|
|
|
|
// addChildLocked add the child inode, inheriting its reference.
|
|
func (d *Dir) addChildLocked(ctx context.Context, name string, inode *fs.Inode) {
|
|
d.children[name] = inode
|
|
d.dentryMap.Add(name, fs.DentAttr{
|
|
Type: inode.StableAttr.Type,
|
|
InodeID: inode.StableAttr.InodeID,
|
|
})
|
|
|
|
// If the child is a directory, increment this dir's link count,
|
|
// corresponding to '..' from the subdirectory.
|
|
if fs.IsDir(inode.StableAttr) {
|
|
d.AddLink()
|
|
// ctime updated below.
|
|
}
|
|
|
|
// Given we're now adding this inode to the directory we must also
|
|
// increase its link count. Similarly we decrement it in removeChildLocked.
|
|
//
|
|
// Changing link count updates ctime.
|
|
inode.AddLink()
|
|
inode.InodeOperations.NotifyStatusChange(ctx)
|
|
|
|
// We've change the directory. This always updates our mtime and ctime.
|
|
d.NotifyModificationAndStatusChange(ctx)
|
|
}
|
|
|
|
// AddChild adds a child to this dir, inheriting its reference.
|
|
func (d *Dir) AddChild(ctx context.Context, name string, inode *fs.Inode) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
d.addChildLocked(ctx, name, inode)
|
|
}
|
|
|
|
// FindChild returns (child, true) if the directory contains name.
|
|
func (d *Dir) FindChild(name string) (*fs.Inode, bool) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
child, ok := d.children[name]
|
|
return child, ok
|
|
}
|
|
|
|
// Children returns the names and DentAttrs of all children. It can be used to
|
|
// implement Readdir for types that embed ramfs.Dir.
|
|
func (d *Dir) Children() ([]string, map[string]fs.DentAttr) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
// Return a copy to prevent callers from modifying our children.
|
|
names, entries := d.dentryMap.GetAll()
|
|
namesCopy := make([]string, len(names))
|
|
copy(namesCopy, names)
|
|
|
|
entriesCopy := make(map[string]fs.DentAttr)
|
|
for k, v := range entries {
|
|
entriesCopy[k] = v
|
|
}
|
|
|
|
return namesCopy, entriesCopy
|
|
}
|
|
|
|
// removeChildLocked attempts to remove an entry from this directory. It
|
|
// returns the removed fs.Inode along with its reference, which callers are
|
|
// responsible for decrementing.
|
|
func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, error) {
|
|
inode, ok := d.children[name]
|
|
if !ok {
|
|
return nil, syserror.EACCES
|
|
}
|
|
|
|
delete(d.children, name)
|
|
d.dentryMap.Remove(name)
|
|
d.NotifyModification(ctx)
|
|
|
|
// If the child was a subdirectory, then we must decrement this dir's
|
|
// link count which was the child's ".." directory entry.
|
|
if fs.IsDir(inode.StableAttr) {
|
|
d.DropLink()
|
|
// ctime changed below.
|
|
}
|
|
|
|
// Given we're now removing this inode to the directory we must also
|
|
// decrease its link count. Similarly it is increased in addChildLocked.
|
|
//
|
|
// Changing link count updates ctime.
|
|
inode.DropLink()
|
|
inode.InodeOperations.NotifyStatusChange(ctx)
|
|
|
|
// We've change the directory. This always updates our mtime and ctime.
|
|
d.NotifyModificationAndStatusChange(ctx)
|
|
|
|
return inode, nil
|
|
}
|
|
|
|
// Remove removes the named non-directory.
|
|
func (d *Dir) Remove(ctx context.Context, _ *fs.Inode, name string) error {
|
|
if len(name) > linux.NAME_MAX {
|
|
return syserror.ENAMETOOLONG
|
|
}
|
|
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
inode, err := d.removeChildLocked(ctx, name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Remove our reference on the inode.
|
|
inode.DecRef()
|
|
return nil
|
|
}
|
|
|
|
// RemoveDirectory removes the named directory.
|
|
func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) error {
|
|
if len(name) > linux.NAME_MAX {
|
|
return syserror.ENAMETOOLONG
|
|
}
|
|
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
// Get the child and make sure it is not empty.
|
|
childInode, err := d.walkLocked(ctx, name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if ok, err := hasChildren(ctx, childInode); err != nil {
|
|
return err
|
|
} else if ok {
|
|
return syserror.ENOTEMPTY
|
|
}
|
|
|
|
// Child was empty. Proceed with removal.
|
|
inode, err := d.removeChildLocked(ctx, name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Remove our reference on the inode.
|
|
inode.DecRef()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Lookup loads an inode at p into a Dirent. It returns the fs.Dirent along
|
|
// with a reference.
|
|
func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, error) {
|
|
if len(p) > linux.NAME_MAX {
|
|
return nil, syserror.ENAMETOOLONG
|
|
}
|
|
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
inode, err := d.walkLocked(ctx, p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Take a reference on the inode before returning it. This reference
|
|
// is owned by the dirent we are about to create.
|
|
inode.IncRef()
|
|
return fs.NewDirent(ctx, inode, p), nil
|
|
}
|
|
|
|
// walkLocked must be called with d.mu held.
|
|
func (d *Dir) walkLocked(ctx context.Context, p string) (*fs.Inode, error) {
|
|
// Lookup a child node.
|
|
if inode, ok := d.children[p]; ok {
|
|
return inode, nil
|
|
}
|
|
|
|
// fs.InodeOperations.Lookup returns syserror.ENOENT if p
|
|
// does not exist.
|
|
return nil, syserror.ENOENT
|
|
}
|
|
|
|
// createInodeOperationsCommon creates a new child node at this dir by calling
|
|
// makeInodeOperations. It is the common logic for creating a new child.
|
|
func (d *Dir) createInodeOperationsCommon(ctx context.Context, name string, makeInodeOperations func() (*fs.Inode, error)) (*fs.Inode, error) {
|
|
if len(name) > linux.NAME_MAX {
|
|
return nil, syserror.ENAMETOOLONG
|
|
}
|
|
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
inode, err := makeInodeOperations()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
d.addChildLocked(ctx, name, inode)
|
|
|
|
return inode, nil
|
|
}
|
|
|
|
// Create creates a new Inode with the given name and returns its File.
|
|
func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perms fs.FilePermissions) (*fs.File, error) {
|
|
if d.CreateOps == nil || d.CreateOps.NewFile == nil {
|
|
return nil, syserror.EACCES
|
|
}
|
|
|
|
inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
|
|
return d.NewFile(ctx, dir, perms)
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Take an extra ref on inode, which will be owned by the dirent.
|
|
inode.IncRef()
|
|
|
|
// Create the Dirent and corresponding file.
|
|
created := fs.NewDirent(ctx, inode, name)
|
|
defer created.DecRef()
|
|
return created.Inode.GetFile(ctx, created, flags)
|
|
}
|
|
|
|
// CreateLink returns a new link.
|
|
func (d *Dir) CreateLink(ctx context.Context, dir *fs.Inode, oldname, newname string) error {
|
|
if d.CreateOps == nil || d.CreateOps.NewSymlink == nil {
|
|
return syserror.EACCES
|
|
}
|
|
_, err := d.createInodeOperationsCommon(ctx, newname, func() (*fs.Inode, error) {
|
|
return d.NewSymlink(ctx, dir, oldname)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// CreateHardLink creates a new hard link.
|
|
func (d *Dir) CreateHardLink(ctx context.Context, dir *fs.Inode, target *fs.Inode, name string) error {
|
|
if len(name) > linux.NAME_MAX {
|
|
return syserror.ENAMETOOLONG
|
|
}
|
|
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
// Take an extra reference on the inode and add it to our children.
|
|
target.IncRef()
|
|
|
|
// The link count will be incremented in addChildLocked.
|
|
d.addChildLocked(ctx, name, target)
|
|
|
|
return nil
|
|
}
|
|
|
|
// CreateDirectory returns a new subdirectory.
|
|
func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perms fs.FilePermissions) error {
|
|
if d.CreateOps == nil || d.CreateOps.NewDir == nil {
|
|
return syserror.EACCES
|
|
}
|
|
_, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
|
|
return d.NewDir(ctx, dir, perms)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Bind implements fs.InodeOperations.Bind.
|
|
func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perms fs.FilePermissions) (*fs.Dirent, error) {
|
|
if d.CreateOps == nil || d.CreateOps.NewBoundEndpoint == nil {
|
|
return nil, syserror.EACCES
|
|
}
|
|
inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
|
|
return d.NewBoundEndpoint(ctx, dir, ep, perms)
|
|
})
|
|
if err == syscall.EEXIST {
|
|
return nil, syscall.EADDRINUSE
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Take another ref on inode which will be donated to the new dirent.
|
|
inode.IncRef()
|
|
return fs.NewDirent(ctx, inode, name), nil
|
|
}
|
|
|
|
// CreateFifo implements fs.InodeOperations.CreateFifo.
|
|
func (d *Dir) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perms fs.FilePermissions) error {
|
|
if d.CreateOps == nil || d.CreateOps.NewFifo == nil {
|
|
return syserror.EACCES
|
|
}
|
|
_, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
|
|
return d.NewFifo(ctx, dir, perms)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// GetFile implements fs.InodeOperations.GetFile.
|
|
func (d *Dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
|
|
flags.Pread = true
|
|
return fs.NewFile(ctx, dirent, flags, &dirFileOperations{dir: d}), nil
|
|
}
|
|
|
|
// Rename implements fs.InodeOperations.Rename.
|
|
func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
|
|
return Rename(ctx, oldParent.InodeOperations, oldName, newParent.InodeOperations, newName, replacement)
|
|
}
|
|
|
|
// Release implements fs.InodeOperation.Release.
|
|
func (d *Dir) Release(_ context.Context) {
|
|
// Drop references on all children.
|
|
d.mu.Lock()
|
|
for _, i := range d.children {
|
|
i.DecRef()
|
|
}
|
|
d.mu.Unlock()
|
|
}
|
|
|
|
// dirFileOperations implements fs.FileOperations for a ramfs directory.
|
|
//
|
|
// +stateify savable
|
|
type dirFileOperations struct {
|
|
fsutil.DirFileOperations `state:"nosave"`
|
|
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
|
|
|
|
// dirCursor contains the name of the last directory entry that was
|
|
// serialized.
|
|
dirCursor string
|
|
|
|
// dir is the ramfs dir that this file corresponds to.
|
|
dir *Dir
|
|
}
|
|
|
|
var _ fs.FileOperations = (*dirFileOperations)(nil)
|
|
|
|
// Seek implements fs.FileOperations.Seek.
|
|
func (dfo *dirFileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
|
|
return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &dfo.dirCursor)
|
|
}
|
|
|
|
// IterateDir implements DirIterator.IterateDir.
|
|
func (dfo *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
|
|
dfo.dir.mu.Lock()
|
|
defer dfo.dir.mu.Unlock()
|
|
|
|
n, err := fs.GenericReaddir(dirCtx, dfo.dir.dentryMap)
|
|
return offset + n, err
|
|
}
|
|
|
|
// Readdir implements FileOperations.Readdir.
|
|
func (dfo *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
|
|
root := fs.RootFromContext(ctx)
|
|
if root != nil {
|
|
defer root.DecRef()
|
|
}
|
|
dirCtx := &fs.DirCtx{
|
|
Serializer: serializer,
|
|
DirCursor: &dfo.dirCursor,
|
|
}
|
|
dfo.dir.InodeSimpleAttributes.NotifyAccess(ctx)
|
|
return fs.DirentReaddir(ctx, file.Dirent, dfo, root, dirCtx, file.Offset())
|
|
}
|
|
|
|
// hasChildren is a helper method that determines whether an arbitrary inode
|
|
// (not necessarily ramfs) has any children.
|
|
func hasChildren(ctx context.Context, inode *fs.Inode) (bool, error) {
|
|
// Take an extra ref on inode which will be given to the dirent and
|
|
// dropped when that dirent is destroyed.
|
|
inode.IncRef()
|
|
d := fs.NewTransientDirent(inode)
|
|
defer d.DecRef()
|
|
|
|
file, err := inode.GetFile(ctx, d, fs.FileFlags{Read: true})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
defer file.DecRef()
|
|
|
|
ser := &fs.CollectEntriesSerializer{}
|
|
if err := file.Readdir(ctx, ser); err != nil {
|
|
return false, err
|
|
}
|
|
// We will always write "." and "..", so ignore those two.
|
|
if ser.Written() > 2 {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// Rename renames from a *ramfs.Dir to another *ramfs.Dir.
|
|
func Rename(ctx context.Context, oldParent fs.InodeOperations, oldName string, newParent fs.InodeOperations, newName string, replacement bool) error {
|
|
op, ok := oldParent.(*Dir)
|
|
if !ok {
|
|
return syserror.EXDEV
|
|
}
|
|
np, ok := newParent.(*Dir)
|
|
if !ok {
|
|
return syserror.EXDEV
|
|
}
|
|
if len(newName) > linux.NAME_MAX {
|
|
return syserror.ENAMETOOLONG
|
|
}
|
|
|
|
np.mu.Lock()
|
|
defer np.mu.Unlock()
|
|
|
|
// Is this is an overwriting rename?
|
|
if replacement {
|
|
replaced, ok := np.children[newName]
|
|
if !ok {
|
|
panic(fmt.Sprintf("Dirent claims rename is replacement, but %q is missing from %+v", newName, np))
|
|
}
|
|
|
|
// Non-empty directories cannot be replaced.
|
|
if fs.IsDir(replaced.StableAttr) {
|
|
if ok, err := hasChildren(ctx, replaced); err != nil {
|
|
return err
|
|
} else if ok {
|
|
return syserror.ENOTEMPTY
|
|
}
|
|
}
|
|
|
|
// Remove the replaced child and drop our reference on it.
|
|
inode, err := np.removeChildLocked(ctx, newName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
inode.DecRef()
|
|
}
|
|
|
|
// Be careful, we may have already grabbed this mutex above.
|
|
if op != np {
|
|
op.mu.Lock()
|
|
defer op.mu.Unlock()
|
|
}
|
|
|
|
// Do the swap.
|
|
n := op.children[oldName]
|
|
op.removeChildLocked(ctx, oldName)
|
|
np.addChildLocked(ctx, newName, n)
|
|
|
|
return nil
|
|
}
|