462 lines
15 KiB
Go
462 lines
15 KiB
Go
// Copyright 2018 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package fs
|
|
|
|
import (
|
|
"sync"
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
"gvisor.dev/gvisor/pkg/log"
|
|
"gvisor.dev/gvisor/pkg/metric"
|
|
"gvisor.dev/gvisor/pkg/refs"
|
|
"gvisor.dev/gvisor/pkg/sentry/context"
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
|
"gvisor.dev/gvisor/pkg/sentry/memmap"
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
)
|
|
|
|
var opens = metric.MustCreateNewUint64Metric("/fs/opens", false /* sync */, "Number of file opens.")
|
|
|
|
// Inode is a file system object that can be simultaneously referenced by different
|
|
// components of the VFS (Dirent, fs.File, etc).
|
|
//
|
|
// +stateify savable
|
|
type Inode struct {
|
|
// AtomicRefCount is our reference count.
|
|
refs.AtomicRefCount
|
|
|
|
// InodeOperations is the file system specific behavior of the Inode.
|
|
InodeOperations InodeOperations
|
|
|
|
// StableAttr are stable cached attributes of the Inode.
|
|
StableAttr StableAttr
|
|
|
|
// LockCtx is the file lock context. It manages its own sychronization and tracks
|
|
// regions of the Inode that have locks held.
|
|
LockCtx LockCtx
|
|
|
|
// Watches is the set of inotify watches for this inode.
|
|
Watches *Watches
|
|
|
|
// MountSource is the mount source this Inode is a part of.
|
|
MountSource *MountSource
|
|
|
|
// overlay is the overlay entry for this Inode.
|
|
overlay *overlayEntry
|
|
|
|
// appendMu is used to synchronize write operations into files which
|
|
// have been opened with O_APPEND. Operations which change a file size
|
|
// have to take this lock for read. Write operations to files with
|
|
// O_APPEND have to take this lock for write.
|
|
appendMu sync.RWMutex `state:"nosave"`
|
|
}
|
|
|
|
// LockCtx is an Inode's lock context and contains different personalities of locks; both
|
|
// Posix and BSD style locks are supported.
|
|
//
|
|
// Note that in Linux fcntl(2) and flock(2) locks are _not_ cooperative, because race and
|
|
// deadlock conditions make merging them prohibitive. We do the same and keep them oblivious
|
|
// to each other but provide a "context" as a convenient container.
|
|
//
|
|
// +stateify savable
|
|
type LockCtx struct {
|
|
// Posix is a set of POSIX-style regional advisory locks, see fcntl(2).
|
|
Posix lock.Locks
|
|
|
|
// BSD is a set of BSD-style advisory file wide locks, see flock(2).
|
|
BSD lock.Locks
|
|
}
|
|
|
|
// NewInode constructs an Inode from InodeOperations, a MountSource, and stable attributes.
|
|
//
|
|
// NewInode takes a reference on msrc.
|
|
func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode {
|
|
msrc.IncRef()
|
|
i := Inode{
|
|
InodeOperations: iops,
|
|
StableAttr: sattr,
|
|
Watches: newWatches(),
|
|
MountSource: msrc,
|
|
}
|
|
i.EnableLeakCheck("fs.Inode")
|
|
return &i
|
|
}
|
|
|
|
// DecRef drops a reference on the Inode.
|
|
func (i *Inode) DecRef() {
|
|
i.DecRefWithDestructor(i.destroy)
|
|
}
|
|
|
|
// destroy releases the Inode and releases the msrc reference taken.
|
|
func (i *Inode) destroy() {
|
|
// FIXME(b/38173783): Context is not plumbed here.
|
|
ctx := context.Background()
|
|
if err := i.WriteOut(ctx); err != nil {
|
|
// FIXME(b/65209558): Mark as warning again once noatime is
|
|
// properly supported.
|
|
log.Debugf("Inode %+v, failed to sync all metadata: %v", i.StableAttr, err)
|
|
}
|
|
|
|
// If this inode is being destroyed because it was unlinked, queue a
|
|
// deletion event. This may not be the case for inodes being revalidated.
|
|
if i.Watches.unlinked {
|
|
i.Watches.Notify("", linux.IN_DELETE_SELF, 0)
|
|
}
|
|
|
|
// Remove references from the watch owners to the watches on this inode,
|
|
// since the watches are about to be GCed. Note that we don't need to worry
|
|
// about the watch pins since if there were any active pins, this inode
|
|
// wouldn't be in the destructor.
|
|
i.Watches.targetDestroyed()
|
|
|
|
if i.overlay != nil {
|
|
i.overlay.release()
|
|
} else {
|
|
i.InodeOperations.Release(ctx)
|
|
}
|
|
|
|
i.MountSource.DecRef()
|
|
}
|
|
|
|
// Mappable calls i.InodeOperations.Mappable.
|
|
func (i *Inode) Mappable() memmap.Mappable {
|
|
if i.overlay != nil {
|
|
// In an overlay, Mappable is always implemented by
|
|
// the overlayEntry metadata to synchronize memory
|
|
// access of files with copy up. But first check if
|
|
// the Inodes involved would be mappable in the first
|
|
// place.
|
|
i.overlay.copyMu.RLock()
|
|
ok := i.overlay.isMappableLocked()
|
|
i.overlay.copyMu.RUnlock()
|
|
if !ok {
|
|
return nil
|
|
}
|
|
return i.overlay
|
|
}
|
|
return i.InodeOperations.Mappable(i)
|
|
}
|
|
|
|
// WriteOut calls i.InodeOperations.WriteOut with i as the Inode.
|
|
func (i *Inode) WriteOut(ctx context.Context) error {
|
|
if i.overlay != nil {
|
|
return overlayWriteOut(ctx, i.overlay)
|
|
}
|
|
return i.InodeOperations.WriteOut(ctx, i)
|
|
}
|
|
|
|
// Lookup calls i.InodeOperations.Lookup with i as the directory.
|
|
func (i *Inode) Lookup(ctx context.Context, name string) (*Dirent, error) {
|
|
if i.overlay != nil {
|
|
d, _, err := overlayLookup(ctx, i.overlay, i, name)
|
|
return d, err
|
|
}
|
|
return i.InodeOperations.Lookup(ctx, i, name)
|
|
}
|
|
|
|
// Create calls i.InodeOperations.Create with i as the directory.
|
|
func (i *Inode) Create(ctx context.Context, d *Dirent, name string, flags FileFlags, perm FilePermissions) (*File, error) {
|
|
if i.overlay != nil {
|
|
return overlayCreate(ctx, i.overlay, d, name, flags, perm)
|
|
}
|
|
return i.InodeOperations.Create(ctx, i, name, flags, perm)
|
|
}
|
|
|
|
// CreateDirectory calls i.InodeOperations.CreateDirectory with i as the directory.
|
|
func (i *Inode) CreateDirectory(ctx context.Context, d *Dirent, name string, perm FilePermissions) error {
|
|
if i.overlay != nil {
|
|
return overlayCreateDirectory(ctx, i.overlay, d, name, perm)
|
|
}
|
|
return i.InodeOperations.CreateDirectory(ctx, i, name, perm)
|
|
}
|
|
|
|
// CreateLink calls i.InodeOperations.CreateLink with i as the directory.
|
|
func (i *Inode) CreateLink(ctx context.Context, d *Dirent, oldname string, newname string) error {
|
|
if i.overlay != nil {
|
|
return overlayCreateLink(ctx, i.overlay, d, oldname, newname)
|
|
}
|
|
return i.InodeOperations.CreateLink(ctx, i, oldname, newname)
|
|
}
|
|
|
|
// CreateHardLink calls i.InodeOperations.CreateHardLink with i as the directory.
|
|
func (i *Inode) CreateHardLink(ctx context.Context, d *Dirent, target *Dirent, name string) error {
|
|
if i.overlay != nil {
|
|
return overlayCreateHardLink(ctx, i.overlay, d, target, name)
|
|
}
|
|
return i.InodeOperations.CreateHardLink(ctx, i, target.Inode, name)
|
|
}
|
|
|
|
// CreateFifo calls i.InodeOperations.CreateFifo with i as the directory.
|
|
func (i *Inode) CreateFifo(ctx context.Context, d *Dirent, name string, perm FilePermissions) error {
|
|
if i.overlay != nil {
|
|
return overlayCreateFifo(ctx, i.overlay, d, name, perm)
|
|
}
|
|
return i.InodeOperations.CreateFifo(ctx, i, name, perm)
|
|
}
|
|
|
|
// Remove calls i.InodeOperations.Remove/RemoveDirectory with i as the directory.
|
|
func (i *Inode) Remove(ctx context.Context, d *Dirent, remove *Dirent) error {
|
|
if i.overlay != nil {
|
|
return overlayRemove(ctx, i.overlay, d, remove)
|
|
}
|
|
switch remove.Inode.StableAttr.Type {
|
|
case Directory, SpecialDirectory:
|
|
return i.InodeOperations.RemoveDirectory(ctx, i, remove.name)
|
|
default:
|
|
return i.InodeOperations.Remove(ctx, i, remove.name)
|
|
}
|
|
}
|
|
|
|
// Rename calls i.InodeOperations.Rename with the given arguments.
|
|
func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent, newParent *Dirent, newName string, replacement bool) error {
|
|
if i.overlay != nil {
|
|
return overlayRename(ctx, i.overlay, oldParent, renamed, newParent, newName, replacement)
|
|
}
|
|
return i.InodeOperations.Rename(ctx, renamed.Inode, oldParent.Inode, renamed.name, newParent.Inode, newName, replacement)
|
|
}
|
|
|
|
// Bind calls i.InodeOperations.Bind with i as the directory.
|
|
func (i *Inode) Bind(ctx context.Context, parent *Dirent, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) {
|
|
if i.overlay != nil {
|
|
return overlayBind(ctx, i.overlay, parent, name, data, perm)
|
|
}
|
|
return i.InodeOperations.Bind(ctx, i, name, data, perm)
|
|
}
|
|
|
|
// BoundEndpoint calls i.InodeOperations.BoundEndpoint with i as the Inode.
|
|
func (i *Inode) BoundEndpoint(path string) transport.BoundEndpoint {
|
|
if i.overlay != nil {
|
|
return overlayBoundEndpoint(i.overlay, path)
|
|
}
|
|
return i.InodeOperations.BoundEndpoint(i, path)
|
|
}
|
|
|
|
// GetFile calls i.InodeOperations.GetFile with the given arguments.
|
|
func (i *Inode) GetFile(ctx context.Context, d *Dirent, flags FileFlags) (*File, error) {
|
|
if i.overlay != nil {
|
|
return overlayGetFile(ctx, i.overlay, d, flags)
|
|
}
|
|
opens.Increment()
|
|
return i.InodeOperations.GetFile(ctx, d, flags)
|
|
}
|
|
|
|
// UnstableAttr calls i.InodeOperations.UnstableAttr with i as the Inode.
|
|
func (i *Inode) UnstableAttr(ctx context.Context) (UnstableAttr, error) {
|
|
if i.overlay != nil {
|
|
return overlayUnstableAttr(ctx, i.overlay)
|
|
}
|
|
return i.InodeOperations.UnstableAttr(ctx, i)
|
|
}
|
|
|
|
// Getxattr calls i.InodeOperations.Getxattr with i as the Inode.
|
|
func (i *Inode) Getxattr(name string) (string, error) {
|
|
if i.overlay != nil {
|
|
return overlayGetxattr(i.overlay, name)
|
|
}
|
|
return i.InodeOperations.Getxattr(i, name)
|
|
}
|
|
|
|
// Listxattr calls i.InodeOperations.Listxattr with i as the Inode.
|
|
func (i *Inode) Listxattr() (map[string]struct{}, error) {
|
|
if i.overlay != nil {
|
|
return overlayListxattr(i.overlay)
|
|
}
|
|
return i.InodeOperations.Listxattr(i)
|
|
}
|
|
|
|
// CheckPermission will check if the caller may access this file in the
|
|
// requested way for reading, writing, or executing.
|
|
//
|
|
// CheckPermission is like Linux's fs/namei.c:inode_permission. It
|
|
// - checks file system mount flags,
|
|
// - and utilizes InodeOperations.Check to check capabilities and modes.
|
|
func (i *Inode) CheckPermission(ctx context.Context, p PermMask) error {
|
|
// First check the outer-most mounted filesystem.
|
|
if p.Write && i.MountSource.Flags.ReadOnly {
|
|
return syserror.EROFS
|
|
}
|
|
|
|
if i.overlay != nil {
|
|
// CheckPermission requires some special handling for
|
|
// an overlay.
|
|
//
|
|
// Writes will always be redirected to an upper filesystem,
|
|
// so ignore all lower layers being read-only.
|
|
//
|
|
// But still honor the upper-most filesystem's mount flags;
|
|
// we should not attempt to modify the writable layer if it
|
|
// is mounted read-only.
|
|
if p.Write && overlayUpperMountSource(i.MountSource).Flags.ReadOnly {
|
|
return syserror.EROFS
|
|
}
|
|
}
|
|
|
|
return i.check(ctx, p)
|
|
}
|
|
|
|
func (i *Inode) check(ctx context.Context, p PermMask) error {
|
|
if i.overlay != nil {
|
|
return overlayCheck(ctx, i.overlay, p)
|
|
}
|
|
if !i.InodeOperations.Check(ctx, i, p) {
|
|
return syserror.EACCES
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetPermissions calls i.InodeOperations.SetPermissions with i as the Inode.
|
|
func (i *Inode) SetPermissions(ctx context.Context, d *Dirent, f FilePermissions) bool {
|
|
if i.overlay != nil {
|
|
return overlaySetPermissions(ctx, i.overlay, d, f)
|
|
}
|
|
return i.InodeOperations.SetPermissions(ctx, i, f)
|
|
}
|
|
|
|
// SetOwner calls i.InodeOperations.SetOwner with i as the Inode.
|
|
func (i *Inode) SetOwner(ctx context.Context, d *Dirent, o FileOwner) error {
|
|
if i.overlay != nil {
|
|
return overlaySetOwner(ctx, i.overlay, d, o)
|
|
}
|
|
return i.InodeOperations.SetOwner(ctx, i, o)
|
|
}
|
|
|
|
// SetTimestamps calls i.InodeOperations.SetTimestamps with i as the Inode.
|
|
func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error {
|
|
if i.overlay != nil {
|
|
return overlaySetTimestamps(ctx, i.overlay, d, ts)
|
|
}
|
|
return i.InodeOperations.SetTimestamps(ctx, i, ts)
|
|
}
|
|
|
|
// Truncate calls i.InodeOperations.Truncate with i as the Inode.
|
|
func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
|
|
if i.overlay != nil {
|
|
return overlayTruncate(ctx, i.overlay, d, size)
|
|
}
|
|
i.appendMu.RLock()
|
|
defer i.appendMu.RUnlock()
|
|
return i.InodeOperations.Truncate(ctx, i, size)
|
|
}
|
|
|
|
func (i *Inode) Allocate(ctx context.Context, d *Dirent, offset int64, length int64) error {
|
|
if i.overlay != nil {
|
|
return overlayAllocate(ctx, i.overlay, d, offset, length)
|
|
}
|
|
return i.InodeOperations.Allocate(ctx, i, offset, length)
|
|
}
|
|
|
|
// Readlink calls i.InodeOperations.Readlnk with i as the Inode.
|
|
func (i *Inode) Readlink(ctx context.Context) (string, error) {
|
|
if i.overlay != nil {
|
|
return overlayReadlink(ctx, i.overlay)
|
|
}
|
|
return i.InodeOperations.Readlink(ctx, i)
|
|
}
|
|
|
|
// Getlink calls i.InodeOperations.Getlink.
|
|
func (i *Inode) Getlink(ctx context.Context) (*Dirent, error) {
|
|
if i.overlay != nil {
|
|
return overlayGetlink(ctx, i.overlay)
|
|
}
|
|
return i.InodeOperations.Getlink(ctx, i)
|
|
}
|
|
|
|
// AddLink calls i.InodeOperations.AddLink.
|
|
func (i *Inode) AddLink() {
|
|
if i.overlay != nil {
|
|
// FIXME(b/63117438): Remove this from InodeOperations altogether.
|
|
//
|
|
// This interface is only used by ramfs to update metadata of
|
|
// children. These filesystems should _never_ have overlay
|
|
// Inodes cached as children. So explicitly disallow this
|
|
// scenario and avoid plumbing Dirents through to do copy up.
|
|
panic("overlay Inodes cached in ramfs directories are not supported")
|
|
}
|
|
i.InodeOperations.AddLink()
|
|
}
|
|
|
|
// DropLink calls i.InodeOperations.DropLink.
|
|
func (i *Inode) DropLink() {
|
|
if i.overlay != nil {
|
|
// Same as AddLink.
|
|
panic("overlay Inodes cached in ramfs directories are not supported")
|
|
}
|
|
i.InodeOperations.DropLink()
|
|
}
|
|
|
|
// IsVirtual calls i.InodeOperations.IsVirtual.
|
|
func (i *Inode) IsVirtual() bool {
|
|
if i.overlay != nil {
|
|
// An overlay configuration does not support virtual files.
|
|
return false
|
|
}
|
|
return i.InodeOperations.IsVirtual()
|
|
}
|
|
|
|
// StatFS calls i.InodeOperations.StatFS.
|
|
func (i *Inode) StatFS(ctx context.Context) (Info, error) {
|
|
if i.overlay != nil {
|
|
return overlayStatFS(ctx, i.overlay)
|
|
}
|
|
return i.InodeOperations.StatFS(ctx)
|
|
}
|
|
|
|
// CheckOwnership checks whether `ctx` owns this Inode or may act as its owner.
|
|
// Compare Linux's fs/inode.c:inode_owner_or_capable().
|
|
func (i *Inode) CheckOwnership(ctx context.Context) bool {
|
|
uattr, err := i.UnstableAttr(ctx)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
creds := auth.CredentialsFromContext(ctx)
|
|
if uattr.Owner.UID == creds.EffectiveKUID {
|
|
return true
|
|
}
|
|
if creds.HasCapability(linux.CAP_FOWNER) && creds.UserNamespace.MapFromKUID(uattr.Owner.UID).Ok() {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// CheckCapability checks whether `ctx` has capability `cp` with respect to
|
|
// operations on this Inode.
|
|
//
|
|
// Compare Linux's kernel/capability.c:capable_wrt_inode_uidgid().
|
|
func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
|
|
uattr, err := i.UnstableAttr(ctx)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
creds := auth.CredentialsFromContext(ctx)
|
|
if !creds.UserNamespace.MapFromKUID(uattr.Owner.UID).Ok() {
|
|
return false
|
|
}
|
|
if !creds.UserNamespace.MapFromKGID(uattr.Owner.GID).Ok() {
|
|
return false
|
|
}
|
|
return creds.HasCapability(cp)
|
|
}
|
|
|
|
func (i *Inode) lockAppendMu(appendMode bool) func() {
|
|
if appendMode {
|
|
i.appendMu.Lock()
|
|
return i.appendMu.Unlock
|
|
}
|
|
i.appendMu.RLock()
|
|
return i.appendMu.RUnlock
|
|
}
|