gvisor/pkg/sentry/fs/gofer/inode.go

583 lines
19 KiB
Go

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gofer
import (
"errors"
"sync"
"syscall"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/fd"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/p9"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/device"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
// inodeOperations implements fs.InodeOperations.
//
// +stateify savable
type inodeOperations struct {
fsutil.InodeNotVirtual `state:"nosave"`
fsutil.InodeNoExtendedAttributes `state:"nosave"`
// fileState implements fs.CachedFileObject. It exists
// to break a circular load dependency between inodeOperations
// and cachingInodeOps (below).
fileState *inodeFileState `state:"wait"`
// cachingInodeOps implement memmap.Mappable for inodeOperations.
cachingInodeOps *fsutil.CachingInodeOperations
// readdirMu protects readdirCache and concurrent Readdirs.
readdirMu sync.Mutex `state:"nosave"`
// readdirCache is a cache of readdir results in the form of
// a fs.SortedDentryMap.
//
// Starts out as nil, and is initialized under readdirMu lazily;
// invalidating the cache means setting it to nil.
readdirCache *fs.SortedDentryMap `state:"nosave"`
}
// inodeFileState implements fs.CachedFileObject and otherwise fully
// encapsulates state that needs to be manually loaded on restore for
// this file object.
//
// This unfortunate structure exists because fs.CachingInodeOperations
// defines afterLoad and therefore cannot be lazily loaded (to break a
// circular load dependency between it and inodeOperations). Even with
// lazy loading, this approach defines the dependencies between objects
// and the expected load behavior more concretely.
//
// +stateify savable
type inodeFileState struct {
// s is common file system state for Gofers.
s *session `state:"wait"`
// MultiDeviceKey consists of:
//
// * Device: file system device from a specific gofer.
// * SecondaryDevice: unique identifier of the attach point.
// * Inode: the inode of this resource, unique per Device.=
//
// These fields combined enable consistent hashing of virtual inodes
// on goferDevice.
key device.MultiDeviceKey `state:"nosave"`
// file is the p9 file that contains a single unopened fid.
file contextFile `state:"nosave"`
// sattr caches the stable attributes.
sattr fs.StableAttr `state:"wait"`
// handlesMu protects the below fields.
handlesMu sync.RWMutex `state:"nosave"`
// If readHandles is non-nil, it holds handles that are either read-only or
// read/write. If writeHandles is non-nil, it holds write-only handles if
// writeHandlesRW is false, and read/write handles if writeHandlesRW is
// true.
//
// Once readHandles becomes non-nil, it can't be changed until
// inodeFileState.Release(), because of a defect in the
// fsutil.CachedFileObject interface: there's no way for the caller of
// fsutil.CachedFileObject.FD() to keep the returned FD open, so if we
// racily replace readHandles after inodeFileState.FD() has returned
// readHandles.Host.FD(), fsutil.CachingInodeOperations may use a closed
// FD. writeHandles can be changed if writeHandlesRW is false, since
// inodeFileState.FD() can't return a write-only FD, but can't be changed
// if writeHandlesRW is true for the same reason.
readHandles *handles `state:"nosave"`
writeHandles *handles `state:"nosave"`
writeHandlesRW bool `state:"nosave"`
// loading is acquired when the inodeFileState begins an asynchronous
// load. It releases when the load is complete. Callers that require all
// state to be available should call waitForLoad() to ensure that.
loading sync.Mutex `state:".(struct{})"`
// savedUAttr is only allocated during S/R. It points to the save-time
// unstable attributes and is used to validate restore-time ones.
//
// Note that these unstable attributes are only used to detect cross-S/R
// external file system metadata changes. They may differ from the
// cached unstable attributes in cachingInodeOps, as that might differ
// from the external file system attributes if there had been WriteOut
// failures. S/R is transparent to Sentry and the latter will continue
// using its cached values after restore.
savedUAttr *fs.UnstableAttr
// hostMappable is created when using 'cacheRemoteRevalidating' to map pages
// directly from host.
hostMappable *fsutil.HostMappable
}
// Release releases file handles.
func (i *inodeFileState) Release(ctx context.Context) {
i.file.close(ctx)
if i.readHandles != nil {
i.readHandles.DecRef()
}
if i.writeHandles != nil {
i.writeHandles.DecRef()
}
}
func (i *inodeFileState) canShareHandles() bool {
// Only share handles for regular files, since for other file types,
// distinct handles may have special semantics even if they represent the
// same file. Disable handle sharing for cache policy cacheNone, since this
// is legacy behavior.
return fs.IsFile(i.sattr) && i.s.cachePolicy != cacheNone
}
// Preconditions: i.handlesMu must be locked for writing.
func (i *inodeFileState) setSharedHandlesLocked(flags fs.FileFlags, h *handles) {
if flags.Read && i.readHandles == nil {
h.IncRef()
i.readHandles = h
}
if flags.Write {
if i.writeHandles == nil {
h.IncRef()
i.writeHandles = h
i.writeHandlesRW = flags.Read
} else if !i.writeHandlesRW && flags.Read {
// Upgrade i.writeHandles.
i.writeHandles.DecRef()
h.IncRef()
i.writeHandles = h
i.writeHandlesRW = flags.Read
}
}
}
// getHandles returns a set of handles for a new file using i opened with the
// given flags.
func (i *inodeFileState) getHandles(ctx context.Context, flags fs.FileFlags) (*handles, error) {
if !i.canShareHandles() {
return newHandles(ctx, i.file, flags)
}
i.handlesMu.Lock()
defer i.handlesMu.Unlock()
// Do we already have usable shared handles?
if flags.Write {
if i.writeHandles != nil && (i.writeHandlesRW || !flags.Read) {
i.writeHandles.IncRef()
return i.writeHandles, nil
}
} else if i.readHandles != nil {
i.readHandles.IncRef()
return i.readHandles, nil
}
// No; get new handles and cache them for future sharing.
h, err := newHandles(ctx, i.file, flags)
if err != nil {
return nil, err
}
i.setSharedHandlesLocked(flags, h)
return h, nil
}
// ReadToBlocksAt implements fsutil.CachedFileObject.ReadToBlocksAt.
func (i *inodeFileState) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) {
i.handlesMu.RLock()
defer i.handlesMu.RUnlock()
return i.readHandles.readWriterAt(ctx, int64(offset)).ReadToBlocks(dsts)
}
// WriteFromBlocksAt implements fsutil.CachedFileObject.WriteFromBlocksAt.
func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) {
i.handlesMu.RLock()
defer i.handlesMu.RUnlock()
return i.writeHandles.readWriterAt(ctx, int64(offset)).WriteFromBlocks(srcs)
}
// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error {
if i.skipSetAttr(mask) {
return nil
}
as, ans := attr.AccessTime.Unix()
ms, mns := attr.ModificationTime.Unix()
// An update of status change time is implied by mask.AccessTime
// or mask.ModificationTime. Updating status change time to a
// time earlier than the system time is not possible.
return i.file.setAttr(
ctx,
p9.SetAttrMask{
Permissions: mask.Perms,
Size: mask.Size,
UID: mask.UID,
GID: mask.GID,
ATime: mask.AccessTime,
ATimeNotSystemTime: true,
MTime: mask.ModificationTime,
MTimeNotSystemTime: true,
}, p9.SetAttr{
Permissions: p9.FileMode(attr.Perms.LinuxMode()),
UID: p9.UID(attr.Owner.UID),
GID: p9.GID(attr.Owner.GID),
Size: uint64(attr.Size),
ATimeSeconds: uint64(as),
ATimeNanoSeconds: uint64(ans),
MTimeSeconds: uint64(ms),
MTimeNanoSeconds: uint64(mns),
})
}
// skipSetAttr checks if attribute change can be skipped. It can be skipped
// when:
// - Mask is empty
// - Mask contains only attributes that cannot be set in the gofer
// - Mask contains only atime and/or mtime, and host FD exists
//
// Updates to atime and mtime can be skipped because cached value will be
// "close enough" to host value, given that operation went directly to host FD.
// Skipping atime updates is particularly important to reduce the number of
// operations sent to the Gofer for readonly files.
func (i *inodeFileState) skipSetAttr(mask fs.AttrMask) bool {
// First remove attributes that cannot be updated.
cpy := mask
cpy.Type = false
cpy.DeviceID = false
cpy.InodeID = false
cpy.BlockSize = false
cpy.Usage = false
cpy.Links = false
if cpy.Empty() {
return true
}
// Then check if more than just atime and mtime is being set.
cpy.AccessTime = false
cpy.ModificationTime = false
if !cpy.Empty() {
return false
}
i.handlesMu.RLock()
defer i.handlesMu.RUnlock()
return (i.readHandles != nil && i.readHandles.Host != nil) ||
(i.writeHandles != nil && i.writeHandles.Host != nil)
}
// Sync implements fsutil.CachedFileObject.Sync.
func (i *inodeFileState) Sync(ctx context.Context) error {
i.handlesMu.RLock()
defer i.handlesMu.RUnlock()
if i.writeHandles == nil {
return nil
}
return i.writeHandles.File.fsync(ctx)
}
// FD implements fsutil.CachedFileObject.FD.
func (i *inodeFileState) FD() int {
i.handlesMu.RLock()
defer i.handlesMu.RUnlock()
if i.writeHandlesRW && i.writeHandles != nil && i.writeHandles.Host != nil {
return int(i.writeHandles.Host.FD())
}
if i.readHandles != nil && i.readHandles.Host != nil {
return int(i.readHandles.Host.FD())
}
return -1
}
// waitForLoad makes sure any restore-issued loading is done.
func (i *inodeFileState) waitForLoad() {
// This is not a no-op. The loading mutex is hold upon restore until
// all loading actions are done.
i.loading.Lock()
i.loading.Unlock()
}
func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, error) {
_, valid, pattr, err := getattr(ctx, i.file)
if err != nil {
return fs.UnstableAttr{}, err
}
return unstable(ctx, valid, pattr, i.s.mounter, i.s.client), nil
}
// session extracts the gofer's session from the MountSource.
func (i *inodeOperations) session() *session {
return i.fileState.s
}
// Release implements fs.InodeOperations.Release.
func (i *inodeOperations) Release(ctx context.Context) {
i.cachingInodeOps.Release()
// Releasing the fileState may make RPCs to the gofer. There is
// no need to wait for those to return, so we can do this
// asynchronously.
//
// We use AsyncWithContext to avoid needing to allocate an extra
// anonymous function on the heap.
fs.AsyncWithContext(ctx, i.fileState.Release)
}
// Mappable implements fs.InodeOperations.Mappable.
func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable {
if i.session().cachePolicy.useCachingInodeOps(inode) {
return i.cachingInodeOps
}
// This check is necessary because it's returning an interface type.
if i.fileState.hostMappable != nil {
return i.fileState.hostMappable
}
return nil
}
// UnstableAttr implements fs.InodeOperations.UnstableAttr.
func (i *inodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
if i.session().cachePolicy.cacheUAttrs(inode) {
return i.cachingInodeOps.UnstableAttr(ctx, inode)
}
return i.fileState.unstableAttr(ctx)
}
// Check implements fs.InodeOperations.Check.
func (i *inodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
return fs.ContextCanAccessFile(ctx, inode, p)
}
// GetFile implements fs.InodeOperations.GetFile.
func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
switch d.Inode.StableAttr.Type {
case fs.Socket:
return i.getFileSocket(ctx, d, flags)
case fs.Pipe:
return i.getFilePipe(ctx, d, flags)
default:
return i.getFileDefault(ctx, d, flags)
}
}
func (i *inodeOperations) getFileSocket(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
f, err := i.fileState.file.connect(ctx, p9.AnonymousSocket)
if err != nil {
return nil, syscall.EIO
}
fsf, err := host.NewSocketWithDirent(ctx, d, f, flags)
if err != nil {
f.Close()
return nil, err
}
return fsf, nil
}
func (i *inodeOperations) getFilePipe(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
// Try to open as a host pipe; if that doesn't work, handle it normally.
pipeOps, err := fdpipe.Open(ctx, i, flags)
if err == errNotHostFile {
return i.getFileDefault(ctx, d, flags)
}
if err != nil {
return nil, err
}
return fs.NewFile(ctx, d, flags, pipeOps), nil
}
// errNotHostFile indicates that the file is not a host file.
var errNotHostFile = errors.New("not a host file")
// NonBlockingOpen implements fdpipe.NonBlockingOpener for opening host named pipes.
func (i *inodeOperations) NonBlockingOpen(ctx context.Context, p fs.PermMask) (*fd.FD, error) {
i.fileState.waitForLoad()
// Get a cloned fid which we will open.
_, newFile, err := i.fileState.file.walk(ctx, nil)
if err != nil {
log.Warningf("Open Walk failed: %v", err)
return nil, err
}
defer newFile.close(ctx)
flags, err := openFlagsFromPerms(p)
if err != nil {
log.Warningf("Open flags %s parsing failed: %v", p, err)
return nil, err
}
hostFile, _, _, err := newFile.open(ctx, flags)
// If the host file returned is nil and the error is nil,
// then this was never a host file to begin with, and should
// be treated like a remote file.
if hostFile == nil && err == nil {
return nil, errNotHostFile
}
return hostFile, err
}
func (i *inodeOperations) getFileDefault(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
h, err := i.fileState.getHandles(ctx, flags)
if err != nil {
return nil, err
}
return NewFile(ctx, d, d.BaseName(), flags, i, h), nil
}
// SetPermissions implements fs.InodeOperations.SetPermissions.
func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, p fs.FilePermissions) bool {
if i.session().cachePolicy.cacheUAttrs(inode) {
return i.cachingInodeOps.SetPermissions(ctx, inode, p)
}
mask := p9.SetAttrMask{Permissions: true}
pattr := p9.SetAttr{Permissions: p9.FileMode(p.LinuxMode())}
// Execute the chmod.
return i.fileState.file.setAttr(ctx, mask, pattr) == nil
}
// SetOwner implements fs.InodeOperations.SetOwner.
func (i *inodeOperations) SetOwner(ctx context.Context, inode *fs.Inode, owner fs.FileOwner) error {
// Save the roundtrip.
if !owner.UID.Ok() && !owner.GID.Ok() {
return nil
}
if i.session().cachePolicy.cacheUAttrs(inode) {
return i.cachingInodeOps.SetOwner(ctx, inode, owner)
}
var mask p9.SetAttrMask
var attr p9.SetAttr
if owner.UID.Ok() {
mask.UID = true
attr.UID = p9.UID(owner.UID)
}
if owner.GID.Ok() {
mask.GID = true
attr.GID = p9.GID(owner.GID)
}
return i.fileState.file.setAttr(ctx, mask, attr)
}
// SetTimestamps implements fs.InodeOperations.SetTimestamps.
func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
if i.session().cachePolicy.cacheUAttrs(inode) {
return i.cachingInodeOps.SetTimestamps(ctx, inode, ts)
}
return utimes(ctx, i.fileState.file, ts)
}
// Truncate implements fs.InodeOperations.Truncate.
func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length int64) error {
// This can only be called for files anyway.
if i.session().cachePolicy.useCachingInodeOps(inode) {
return i.cachingInodeOps.Truncate(ctx, inode, length)
}
if i.session().cachePolicy == cacheRemoteRevalidating {
return i.fileState.hostMappable.Truncate(ctx, length)
}
return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)})
}
// WriteOut implements fs.InodeOperations.WriteOut.
func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
if !i.session().cachePolicy.cacheUAttrs(inode) {
return nil
}
return i.cachingInodeOps.WriteOut(ctx, inode)
}
// Readlink implements fs.InodeOperations.Readlink.
func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
if !fs.IsSymlink(inode.StableAttr) {
return "", syscall.ENOLINK
}
return i.fileState.file.readlink(ctx)
}
// Getlink implementfs fs.InodeOperations.Getlink.
func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
if !fs.IsSymlink(i.fileState.sattr) {
return nil, syserror.ENOLINK
}
return nil, fs.ErrResolveViaReadlink
}
// StatFS makes a StatFS request.
func (i *inodeOperations) StatFS(ctx context.Context) (fs.Info, error) {
fsstat, err := i.fileState.file.statFS(ctx)
if err != nil {
return fs.Info{}, err
}
info := fs.Info{
// This is primarily for distinguishing a gofer file system in
// tests. Testing is important, so instead of defining
// something completely random, use a standard value.
Type: linux.V9FS_MAGIC,
TotalBlocks: fsstat.Blocks,
FreeBlocks: fsstat.BlocksFree,
TotalFiles: fsstat.Files,
FreeFiles: fsstat.FilesFree,
}
// If blocks available is non-zero, prefer that.
if fsstat.BlocksAvailable != 0 {
info.FreeBlocks = fsstat.BlocksAvailable
}
return info, nil
}
func (i *inodeOperations) configureMMap(file *fs.File, opts *memmap.MMapOpts) error {
if i.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
return fsutil.GenericConfigureMMap(file, i.cachingInodeOps, opts)
}
if i.fileState.hostMappable != nil {
return fsutil.GenericConfigureMMap(file, i.fileState.hostMappable, opts)
}
return syserror.ENODEV
}
func init() {
syserror.AddErrorUnwrapper(func(err error) (syscall.Errno, bool) {
if _, ok := err.(p9.ErrSocket); ok {
// Treat as an I/O error.
return syscall.EIO, true
}
return 0, false
})
}
// AddLink implements InodeOperations.AddLink, but is currently a noop.
// FIXME(b/63117438): Remove this from InodeOperations altogether.
func (*inodeOperations) AddLink() {}
// DropLink implements InodeOperations.DropLink, but is currently a noop.
// FIXME(b/63117438): Remove this from InodeOperations altogether.
func (*inodeOperations) DropLink() {}
// NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange.
// FIXME(b/63117438): Remove this from InodeOperations altogether.
func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}