2019-07-18 22:09:14 +00:00
|
|
|
// Copyright 2019 The gVisor Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package vfs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync/atomic"
|
|
|
|
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/context"
|
2019-07-18 22:09:14 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/arch"
|
2020-01-29 19:15:59 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
|
2019-12-20 19:52:24 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
2019-07-18 22:09:14 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/memmap"
|
2020-01-28 21:10:41 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sync"
|
2019-12-18 23:47:24 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/usermem"
|
2019-07-18 22:09:14 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/waiter"
|
|
|
|
)
|
|
|
|
|
|
|
|
// A FileDescription represents an open file description, which is the entity
|
|
|
|
// referred to by a file descriptor (POSIX.1-2017 3.258 "Open File
|
|
|
|
// Description").
|
|
|
|
//
|
|
|
|
// FileDescriptions are reference-counted. Unless otherwise specified, all
|
|
|
|
// FileDescription methods require that a reference is held.
|
|
|
|
//
|
|
|
|
// FileDescription is analogous to Linux's struct file.
|
|
|
|
type FileDescription struct {
|
|
|
|
// refs is the reference count. refs is accessed using atomic memory
|
|
|
|
// operations.
|
|
|
|
refs int64
|
|
|
|
|
2019-12-20 19:52:24 +00:00
|
|
|
// statusFlags contains status flags, "initialized by open(2) and possibly
|
|
|
|
// modified by fcntl()" - fcntl(2). statusFlags is accessed using atomic
|
|
|
|
// memory operations.
|
|
|
|
statusFlags uint32
|
|
|
|
|
2020-01-28 21:10:41 +00:00
|
|
|
// epolls is the set of epollInterests registered for this FileDescription.
|
|
|
|
// epolls is protected by epollMu.
|
|
|
|
epollMu sync.Mutex
|
|
|
|
epolls map[*epollInterest]struct{}
|
|
|
|
|
2019-07-18 22:09:14 +00:00
|
|
|
// vd is the filesystem location at which this FileDescription was opened.
|
|
|
|
// A reference is held on vd. vd is immutable.
|
|
|
|
vd VirtualDentry
|
|
|
|
|
2020-01-22 20:27:16 +00:00
|
|
|
// opts contains options passed to FileDescription.Init(). opts is
|
|
|
|
// immutable.
|
2019-12-20 19:52:24 +00:00
|
|
|
opts FileDescriptionOptions
|
|
|
|
|
2020-01-22 20:27:16 +00:00
|
|
|
// readable is MayReadFileWithOpenFlags(statusFlags). readable is
|
|
|
|
// immutable.
|
|
|
|
//
|
|
|
|
// readable is analogous to Linux's FMODE_READ.
|
|
|
|
readable bool
|
|
|
|
|
|
|
|
// writable is MayWriteFileWithOpenFlags(statusFlags). If writable is true,
|
|
|
|
// the FileDescription holds a write count on vd.mount. writable is
|
|
|
|
// immutable.
|
|
|
|
//
|
|
|
|
// writable is analogous to Linux's FMODE_WRITE.
|
|
|
|
writable bool
|
|
|
|
|
2020-06-10 01:44:57 +00:00
|
|
|
usedLockBSD uint32
|
|
|
|
|
2019-07-18 22:09:14 +00:00
|
|
|
// impl is the FileDescriptionImpl associated with this Filesystem. impl is
|
|
|
|
// immutable. This should be the last field in FileDescription.
|
|
|
|
impl FileDescriptionImpl
|
|
|
|
}
|
|
|
|
|
2019-12-20 19:52:24 +00:00
|
|
|
// FileDescriptionOptions contains options to FileDescription.Init().
|
|
|
|
type FileDescriptionOptions struct {
|
|
|
|
// If AllowDirectIO is true, allow O_DIRECT to be set on the file. This is
|
|
|
|
// usually only the case if O_DIRECT would actually have an effect.
|
|
|
|
AllowDirectIO bool
|
2019-12-30 19:35:06 +00:00
|
|
|
|
2020-04-02 23:57:08 +00:00
|
|
|
// If DenyPRead is true, calls to FileDescription.PRead() return ESPIPE.
|
|
|
|
DenyPRead bool
|
|
|
|
|
|
|
|
// If DenyPWrite is true, calls to FileDescription.PWrite() return
|
|
|
|
// ESPIPE.
|
|
|
|
DenyPWrite bool
|
|
|
|
|
2019-12-30 19:35:06 +00:00
|
|
|
// If UseDentryMetadata is true, calls to FileDescription methods that
|
|
|
|
// interact with file and filesystem metadata (Stat, SetStat, StatFS,
|
|
|
|
// Listxattr, Getxattr, Setxattr, Removexattr) are implemented by calling
|
|
|
|
// the corresponding FilesystemImpl methods instead of the corresponding
|
|
|
|
// FileDescriptionImpl methods.
|
|
|
|
//
|
|
|
|
// UseDentryMetadata is intended for file descriptions that are implemented
|
|
|
|
// outside of individual filesystems, such as pipes, sockets, and device
|
|
|
|
// special files. FileDescriptions for which UseDentryMetadata is true may
|
|
|
|
// embed DentryMetadataFileDescriptionImpl to obtain appropriate
|
|
|
|
// implementations of FileDescriptionImpl methods that should not be
|
|
|
|
// called.
|
|
|
|
UseDentryMetadata bool
|
2019-12-20 19:52:24 +00:00
|
|
|
}
|
|
|
|
|
Add //pkg/sentry/fsimpl/overlay.
Major differences from existing overlay filesystems:
- Linux allows lower layers in an overlay to require revalidation, but not the
upper layer. VFS1 allows the upper layer in an overlay to require
revalidation, but not the lower layer. VFS2 does not allow any layers to
require revalidation. (Now that vfs.MkdirOptions.ForSyntheticMountpoint
exists, no uses of overlay in VFS1 are believed to require upper layer
revalidation; in particular, the requirement that the upper layer support the
creation of "trusted." extended attributes for whiteouts effectively required
the upper filesystem to be tmpfs in most cases.)
- Like VFS1, but unlike Linux, VFS2 overlay does not attempt to make mutations
of the upper layer atomic using a working directory and features like
RENAME_WHITEOUT. (This may change in the future, since not having a working
directory makes error recovery for some operations, e.g. rmdir, particularly
painful.)
- Like Linux, but unlike VFS1, VFS2 represents whiteouts using character
devices with rdev == 0; the equivalent of the whiteout attribute on
directories is xattr trusted.overlay.opaque = "y"; and there is no equivalent
to the whiteout attribute on non-directories since non-directories are never
merged with lower layers.
- Device and inode numbers work as follows:
- In Linux, modulo the xino feature and a special case for when all layers
are the same filesystem:
- Directories use the overlay filesystem's device number and an
ephemeral inode number assigned by the overlay.
- Non-directories that have been copied up use the device and inode
number assigned by the upper filesystem.
- Non-directories that have not been copied up use a per-(overlay,
layer)-pair device number and the inode number assigned by the lower
filesystem.
- In VFS1, device and inode numbers always come from the lower layer unless
"whited out"; this has the adverse effect of requiring interaction with
the lower filesystem even for non-directory files that exist on the upper
layer.
- In VFS2, device and inode numbers are assigned as in Linux, except that
xino and the samefs special case are not supported.
- Like Linux, but unlike VFS1, VFS2 does not attempt to maintain memory mapping
coherence across copy-up. (This may have to change in the future, as users
may be dependent on this property.)
- Like Linux, but unlike VFS1, VFS2 uses the overlayfs mounter's credentials
when interacting with the overlay's layers, rather than the caller's.
- Like Linux, but unlike VFS1, VFS2 permits multiple lower layers in an
overlay.
- Like Linux, but unlike VFS1, VFS2's overlay filesystem is
application-mountable.
Updates #1199
PiperOrigin-RevId: 316019067
2020-06-12 01:33:35 +00:00
|
|
|
// FileCreationFlags are the set of flags passed to FileDescription.Init() but
|
|
|
|
// omitted from FileDescription.StatusFlags().
|
|
|
|
const FileCreationFlags = linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC
|
|
|
|
|
2020-01-22 20:27:16 +00:00
|
|
|
// Init must be called before first use of fd. If it succeeds, it takes
|
2020-04-21 23:30:26 +00:00
|
|
|
// references on mnt and d. flags is the initial file description flags, which
|
|
|
|
// is usually the full set of flags passed to open(2).
|
|
|
|
func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) error {
|
|
|
|
writable := MayWriteFileWithOpenFlags(flags)
|
2020-01-22 20:27:16 +00:00
|
|
|
if writable {
|
|
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-18 22:09:14 +00:00
|
|
|
fd.refs = 1
|
2020-04-21 23:30:26 +00:00
|
|
|
|
|
|
|
// Remove "file creation flags" to mirror the behavior from file.f_flags in
|
Add //pkg/sentry/fsimpl/overlay.
Major differences from existing overlay filesystems:
- Linux allows lower layers in an overlay to require revalidation, but not the
upper layer. VFS1 allows the upper layer in an overlay to require
revalidation, but not the lower layer. VFS2 does not allow any layers to
require revalidation. (Now that vfs.MkdirOptions.ForSyntheticMountpoint
exists, no uses of overlay in VFS1 are believed to require upper layer
revalidation; in particular, the requirement that the upper layer support the
creation of "trusted." extended attributes for whiteouts effectively required
the upper filesystem to be tmpfs in most cases.)
- Like VFS1, but unlike Linux, VFS2 overlay does not attempt to make mutations
of the upper layer atomic using a working directory and features like
RENAME_WHITEOUT. (This may change in the future, since not having a working
directory makes error recovery for some operations, e.g. rmdir, particularly
painful.)
- Like Linux, but unlike VFS1, VFS2 represents whiteouts using character
devices with rdev == 0; the equivalent of the whiteout attribute on
directories is xattr trusted.overlay.opaque = "y"; and there is no equivalent
to the whiteout attribute on non-directories since non-directories are never
merged with lower layers.
- Device and inode numbers work as follows:
- In Linux, modulo the xino feature and a special case for when all layers
are the same filesystem:
- Directories use the overlay filesystem's device number and an
ephemeral inode number assigned by the overlay.
- Non-directories that have been copied up use the device and inode
number assigned by the upper filesystem.
- Non-directories that have not been copied up use a per-(overlay,
layer)-pair device number and the inode number assigned by the lower
filesystem.
- In VFS1, device and inode numbers always come from the lower layer unless
"whited out"; this has the adverse effect of requiring interaction with
the lower filesystem even for non-directory files that exist on the upper
layer.
- In VFS2, device and inode numbers are assigned as in Linux, except that
xino and the samefs special case are not supported.
- Like Linux, but unlike VFS1, VFS2 does not attempt to maintain memory mapping
coherence across copy-up. (This may have to change in the future, as users
may be dependent on this property.)
- Like Linux, but unlike VFS1, VFS2 uses the overlayfs mounter's credentials
when interacting with the overlay's layers, rather than the caller's.
- Like Linux, but unlike VFS1, VFS2 permits multiple lower layers in an
overlay.
- Like Linux, but unlike VFS1, VFS2's overlay filesystem is
application-mountable.
Updates #1199
PiperOrigin-RevId: 316019067
2020-06-12 01:33:35 +00:00
|
|
|
// fs/open.c:do_dentry_open.
|
|
|
|
fd.statusFlags = flags &^ FileCreationFlags
|
2019-07-18 22:09:14 +00:00
|
|
|
fd.vd = VirtualDentry{
|
|
|
|
mount: mnt,
|
|
|
|
dentry: d,
|
|
|
|
}
|
2020-04-21 19:16:42 +00:00
|
|
|
mnt.IncRef()
|
|
|
|
d.IncRef()
|
2019-12-20 19:52:24 +00:00
|
|
|
fd.opts = *opts
|
2020-04-21 23:30:26 +00:00
|
|
|
fd.readable = MayReadFileWithOpenFlags(flags)
|
2020-01-22 20:27:16 +00:00
|
|
|
fd.writable = writable
|
2019-07-18 22:09:14 +00:00
|
|
|
fd.impl = impl
|
2020-01-22 20:27:16 +00:00
|
|
|
return nil
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// IncRef increments fd's reference count.
|
|
|
|
func (fd *FileDescription) IncRef() {
|
|
|
|
atomic.AddInt64(&fd.refs, 1)
|
|
|
|
}
|
|
|
|
|
Minor VFS2 interface changes.
- Remove the Filesystem argument from DentryImpl.*Ref(); in general DentryImpls
that need the Filesystem for reference counting will probably also need it
for other interface methods that don't plumb Filesystem, so it's easier to
just store a pointer to the filesystem in the DentryImpl.
- Add a pointer to the VirtualFilesystem to Filesystem, which is needed by the
gofer client to disown dentries for cache eviction triggered by dentry
reference count changes.
- Rename FilesystemType.NewFilesystem to GetFilesystem; in some cases (e.g.
sysfs, cgroupfs) it's much cleaner for there to be only one Filesystem that
is used by all mounts, and in at least one case (devtmpfs) it's visibly
incorrect not to do so, so NewFilesystem doesn't always actually create and
return a *new* Filesystem.
- Require callers of FileDescription.Init() to increment Mount/Dentry
references. This is because the gofer client may, in the OpenAt() path, take
a reference on a dentry with 0 references, which is safe due to
synchronization that is outside the scope of this CL, and it would be safer
to still have its implementation of DentryImpl.IncRef() check for an
increment for 0 references in other cases.
- Add FileDescription.TryIncRef. This is used by the gofer client to take
references on "special file descriptions" (FDs for files such as pipes,
sockets, and devices), which use per-FD handles (fids) instead of
dentry-shared handles, for sync() and syncfs().
PiperOrigin-RevId: 282473364
2019-11-26 02:09:15 +00:00
|
|
|
// TryIncRef increments fd's reference count and returns true. If fd's
|
|
|
|
// reference count is already zero, TryIncRef does nothing and returns false.
|
|
|
|
//
|
|
|
|
// TryIncRef does not require that a reference is held on fd.
|
|
|
|
func (fd *FileDescription) TryIncRef() bool {
|
|
|
|
for {
|
|
|
|
refs := atomic.LoadInt64(&fd.refs)
|
|
|
|
if refs <= 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-18 22:09:14 +00:00
|
|
|
// DecRef decrements fd's reference count.
|
|
|
|
func (fd *FileDescription) DecRef() {
|
|
|
|
if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 {
|
2020-01-28 21:10:41 +00:00
|
|
|
// Unregister fd from all epoll instances.
|
|
|
|
fd.epollMu.Lock()
|
|
|
|
epolls := fd.epolls
|
|
|
|
fd.epolls = nil
|
|
|
|
fd.epollMu.Unlock()
|
|
|
|
for epi := range epolls {
|
|
|
|
ep := epi.epoll
|
|
|
|
ep.interestMu.Lock()
|
|
|
|
// Check that epi has not been concurrently unregistered by
|
|
|
|
// EpollInstance.DeleteInterest() or EpollInstance.Release().
|
|
|
|
if _, ok := ep.interest[epi.key]; ok {
|
|
|
|
fd.EventUnregister(&epi.waiter)
|
|
|
|
ep.removeLocked(epi)
|
|
|
|
}
|
|
|
|
ep.interestMu.Unlock()
|
|
|
|
}
|
2020-06-10 01:44:57 +00:00
|
|
|
|
|
|
|
// If BSD locks were used, release any lock that it may have acquired.
|
|
|
|
if atomic.LoadUint32(&fd.usedLockBSD) != 0 {
|
|
|
|
fd.impl.UnlockBSD(context.Background(), fd)
|
|
|
|
}
|
|
|
|
|
2020-01-28 21:10:41 +00:00
|
|
|
// Release implementation resources.
|
2019-07-18 22:09:14 +00:00
|
|
|
fd.impl.Release()
|
2020-01-22 20:27:16 +00:00
|
|
|
if fd.writable {
|
|
|
|
fd.vd.mount.EndWrite()
|
|
|
|
}
|
2019-07-18 22:09:14 +00:00
|
|
|
fd.vd.DecRef()
|
|
|
|
} else if refs < 0 {
|
|
|
|
panic("FileDescription.DecRef() called without holding a reference")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-05 04:01:42 +00:00
|
|
|
// Refs returns the current number of references. The returned count
|
|
|
|
// is inherently racy and is unsafe to use without external synchronization.
|
|
|
|
func (fd *FileDescription) Refs() int64 {
|
|
|
|
return atomic.LoadInt64(&fd.refs)
|
|
|
|
}
|
|
|
|
|
2019-12-20 19:52:24 +00:00
|
|
|
// Mount returns the mount on which fd was opened. It does not take a reference
|
|
|
|
// on the returned Mount.
|
|
|
|
func (fd *FileDescription) Mount() *Mount {
|
|
|
|
return fd.vd.mount
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dentry returns the dentry at which fd was opened. It does not take a
|
|
|
|
// reference on the returned Dentry.
|
|
|
|
func (fd *FileDescription) Dentry() *Dentry {
|
|
|
|
return fd.vd.dentry
|
|
|
|
}
|
|
|
|
|
|
|
|
// VirtualDentry returns the location at which fd was opened. It does not take
|
|
|
|
// a reference on the returned VirtualDentry.
|
|
|
|
func (fd *FileDescription) VirtualDentry() VirtualDentry {
|
|
|
|
return fd.vd
|
|
|
|
}
|
|
|
|
|
2020-05-27 04:42:07 +00:00
|
|
|
// Options returns the options passed to fd.Init().
|
|
|
|
func (fd *FileDescription) Options() FileDescriptionOptions {
|
|
|
|
return fd.opts
|
|
|
|
}
|
|
|
|
|
2019-12-20 19:52:24 +00:00
|
|
|
// StatusFlags returns file description status flags, as for fcntl(F_GETFL).
|
|
|
|
func (fd *FileDescription) StatusFlags() uint32 {
|
|
|
|
return atomic.LoadUint32(&fd.statusFlags)
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetStatusFlags sets file description status flags, as for fcntl(F_SETFL).
|
|
|
|
func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Credentials, flags uint32) error {
|
|
|
|
// Compare Linux's fs/fcntl.c:setfl().
|
|
|
|
oldFlags := fd.StatusFlags()
|
|
|
|
// Linux documents this check as "O_APPEND cannot be cleared if the file is
|
|
|
|
// marked as append-only and the file is open for write", which would make
|
|
|
|
// sense. However, the check as actually implemented seems to be "O_APPEND
|
|
|
|
// cannot be changed if the file is marked as append-only".
|
|
|
|
if (flags^oldFlags)&linux.O_APPEND != 0 {
|
2019-12-30 19:35:06 +00:00
|
|
|
stat, err := fd.Stat(ctx, StatOptions{
|
2019-12-20 19:52:24 +00:00
|
|
|
// There is no mask bit for stx_attributes.
|
|
|
|
Mask: 0,
|
|
|
|
// Linux just reads inode::i_flags directly.
|
|
|
|
Sync: linux.AT_STATX_DONT_SYNC,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if (stat.AttributesMask&linux.STATX_ATTR_APPEND != 0) && (stat.Attributes&linux.STATX_ATTR_APPEND != 0) {
|
|
|
|
return syserror.EPERM
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (flags&linux.O_NOATIME != 0) && (oldFlags&linux.O_NOATIME == 0) {
|
2019-12-30 19:35:06 +00:00
|
|
|
stat, err := fd.Stat(ctx, StatOptions{
|
2019-12-20 19:52:24 +00:00
|
|
|
Mask: linux.STATX_UID,
|
|
|
|
// Linux's inode_owner_or_capable() just reads inode::i_uid
|
|
|
|
// directly.
|
|
|
|
Sync: linux.AT_STATX_DONT_SYNC,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if stat.Mask&linux.STATX_UID == 0 {
|
|
|
|
return syserror.EPERM
|
|
|
|
}
|
|
|
|
if !CanActAsOwner(creds, auth.KUID(stat.UID)) {
|
|
|
|
return syserror.EPERM
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO {
|
|
|
|
return syserror.EINVAL
|
|
|
|
}
|
|
|
|
// TODO(jamieliu): FileDescriptionImpl.SetOAsync()?
|
|
|
|
const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK
|
|
|
|
atomic.StoreUint32(&fd.statusFlags, (oldFlags&^settableFlags)|(flags&settableFlags))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-01-22 20:27:16 +00:00
|
|
|
// IsReadable returns true if fd was opened for reading.
|
|
|
|
func (fd *FileDescription) IsReadable() bool {
|
|
|
|
return fd.readable
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsWritable returns true if fd was opened for writing.
|
|
|
|
func (fd *FileDescription) IsWritable() bool {
|
|
|
|
return fd.writable
|
|
|
|
}
|
|
|
|
|
2019-12-20 19:52:24 +00:00
|
|
|
// Impl returns the FileDescriptionImpl associated with fd.
|
|
|
|
func (fd *FileDescription) Impl() FileDescriptionImpl {
|
|
|
|
return fd.impl
|
|
|
|
}
|
|
|
|
|
2019-07-18 22:09:14 +00:00
|
|
|
// FileDescriptionImpl contains implementation details for an FileDescription.
|
|
|
|
// Implementations of FileDescriptionImpl should contain their associated
|
|
|
|
// FileDescription by value as their first field.
|
|
|
|
//
|
|
|
|
// For all functions that return linux.Statx, Statx.Uid and Statx.Gid will
|
|
|
|
// be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID and
|
|
|
|
// auth.KGID respectively).
|
|
|
|
//
|
2019-12-23 21:17:29 +00:00
|
|
|
// All methods may return errors not specified.
|
|
|
|
//
|
2019-07-18 22:09:14 +00:00
|
|
|
// FileDescriptionImpl is analogous to Linux's struct file_operations.
|
|
|
|
type FileDescriptionImpl interface {
|
|
|
|
// Release is called when the associated FileDescription reaches zero
|
|
|
|
// references.
|
|
|
|
Release()
|
|
|
|
|
|
|
|
// OnClose is called when a file descriptor representing the
|
|
|
|
// FileDescription is closed. Note that returning a non-nil error does not
|
|
|
|
// prevent the file descriptor from being closed.
|
2019-10-16 01:39:16 +00:00
|
|
|
OnClose(ctx context.Context) error
|
2019-07-18 22:09:14 +00:00
|
|
|
|
|
|
|
// Stat returns metadata for the file represented by the FileDescription.
|
|
|
|
Stat(ctx context.Context, opts StatOptions) (linux.Statx, error)
|
|
|
|
|
|
|
|
// SetStat updates metadata for the file represented by the
|
2020-03-16 20:28:00 +00:00
|
|
|
// FileDescription. Implementations are responsible for checking if the
|
|
|
|
// operation can be performed (see vfs.CheckSetStat() for common checks).
|
2019-07-18 22:09:14 +00:00
|
|
|
SetStat(ctx context.Context, opts SetStatOptions) error
|
|
|
|
|
|
|
|
// StatFS returns metadata for the filesystem containing the file
|
|
|
|
// represented by the FileDescription.
|
|
|
|
StatFS(ctx context.Context) (linux.Statfs, error)
|
|
|
|
|
|
|
|
// waiter.Waitable methods may be used to poll for I/O events.
|
|
|
|
waiter.Waitable
|
|
|
|
|
|
|
|
// PRead reads from the file into dst, starting at the given offset, and
|
|
|
|
// returns the number of bytes read. PRead is permitted to return partial
|
|
|
|
// reads with a nil error.
|
2019-12-23 21:17:29 +00:00
|
|
|
//
|
|
|
|
// Errors:
|
|
|
|
//
|
|
|
|
// - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP.
|
2020-01-22 20:27:16 +00:00
|
|
|
//
|
|
|
|
// Preconditions: The FileDescription was opened for reading.
|
2020-04-02 23:57:08 +00:00
|
|
|
// FileDescriptionOptions.DenyPRead == false.
|
2019-07-18 22:09:14 +00:00
|
|
|
PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error)
|
|
|
|
|
|
|
|
// Read is similar to PRead, but does not specify an offset.
|
|
|
|
//
|
|
|
|
// For files with an implicit FileDescription offset (e.g. regular files),
|
|
|
|
// Read begins at the FileDescription offset, and advances the offset by
|
|
|
|
// the number of bytes read; note that POSIX 2.9.7 "Thread Interactions
|
|
|
|
// with Regular File Operations" requires that all operations that may
|
|
|
|
// mutate the FileDescription offset are serialized.
|
2019-12-23 21:17:29 +00:00
|
|
|
//
|
|
|
|
// Errors:
|
|
|
|
//
|
|
|
|
// - If opts.Flags specifies unsupported options, Read returns EOPNOTSUPP.
|
2020-01-22 20:27:16 +00:00
|
|
|
//
|
|
|
|
// Preconditions: The FileDescription was opened for reading.
|
2019-07-18 22:09:14 +00:00
|
|
|
Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error)
|
|
|
|
|
|
|
|
// PWrite writes src to the file, starting at the given offset, and returns
|
|
|
|
// the number of bytes written. PWrite is permitted to return partial
|
|
|
|
// writes with a nil error.
|
|
|
|
//
|
|
|
|
// As in Linux (but not POSIX), if O_APPEND is in effect for the
|
|
|
|
// FileDescription, PWrite should ignore the offset and append data to the
|
|
|
|
// end of the file.
|
2019-12-23 21:17:29 +00:00
|
|
|
//
|
|
|
|
// Errors:
|
|
|
|
//
|
|
|
|
// - If opts.Flags specifies unsupported options, PWrite returns
|
|
|
|
// EOPNOTSUPP.
|
2020-01-22 20:27:16 +00:00
|
|
|
//
|
|
|
|
// Preconditions: The FileDescription was opened for writing.
|
2020-04-02 23:57:08 +00:00
|
|
|
// FileDescriptionOptions.DenyPWrite == false.
|
2019-07-18 22:09:14 +00:00
|
|
|
PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error)
|
|
|
|
|
|
|
|
// Write is similar to PWrite, but does not specify an offset, which is
|
|
|
|
// implied as for Read.
|
|
|
|
//
|
|
|
|
// Write is a FileDescriptionImpl method, instead of a wrapper around
|
|
|
|
// PWrite that uses a FileDescription offset, to make it possible for
|
|
|
|
// remote filesystems to implement O_APPEND correctly (i.e. atomically with
|
|
|
|
// respect to writers outside the scope of VFS).
|
2019-12-23 21:17:29 +00:00
|
|
|
//
|
|
|
|
// Errors:
|
|
|
|
//
|
|
|
|
// - If opts.Flags specifies unsupported options, Write returns EOPNOTSUPP.
|
2020-01-22 20:27:16 +00:00
|
|
|
//
|
|
|
|
// Preconditions: The FileDescription was opened for writing.
|
2019-07-18 22:09:14 +00:00
|
|
|
Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error)
|
|
|
|
|
|
|
|
// IterDirents invokes cb on each entry in the directory represented by the
|
|
|
|
// FileDescription. If IterDirents has been called since the last call to
|
|
|
|
// Seek, it continues iteration from the end of the last call.
|
|
|
|
IterDirents(ctx context.Context, cb IterDirentsCallback) error
|
|
|
|
|
|
|
|
// Seek changes the FileDescription offset (assuming one exists) and
|
|
|
|
// returns its new value.
|
|
|
|
//
|
|
|
|
// For directories, if whence == SEEK_SET and offset == 0, the caller is
|
|
|
|
// rewinddir(), such that Seek "shall also cause the directory stream to
|
|
|
|
// refer to the current state of the corresponding directory" -
|
|
|
|
// POSIX.1-2017.
|
|
|
|
Seek(ctx context.Context, offset int64, whence int32) (int64, error)
|
|
|
|
|
|
|
|
// Sync requests that cached state associated with the file represented by
|
|
|
|
// the FileDescription is synchronized with persistent storage, and blocks
|
|
|
|
// until this is complete.
|
|
|
|
Sync(ctx context.Context) error
|
|
|
|
|
|
|
|
// ConfigureMMap mutates opts to implement mmap(2) for the file. Most
|
|
|
|
// implementations that support memory mapping can call
|
|
|
|
// GenericConfigureMMap with the appropriate memmap.Mappable.
|
2019-10-16 01:39:16 +00:00
|
|
|
ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error
|
2019-07-18 22:09:14 +00:00
|
|
|
|
|
|
|
// Ioctl implements the ioctl(2) syscall.
|
|
|
|
Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error)
|
|
|
|
|
2019-12-18 23:47:24 +00:00
|
|
|
// Listxattr returns all extended attribute names for the file.
|
2020-04-11 02:01:39 +00:00
|
|
|
Listxattr(ctx context.Context, size uint64) ([]string, error)
|
2019-12-18 23:47:24 +00:00
|
|
|
|
|
|
|
// Getxattr returns the value associated with the given extended attribute
|
|
|
|
// for the file.
|
2020-04-11 02:01:39 +00:00
|
|
|
Getxattr(ctx context.Context, opts GetxattrOptions) (string, error)
|
2019-12-18 23:47:24 +00:00
|
|
|
|
|
|
|
// Setxattr changes the value associated with the given extended attribute
|
|
|
|
// for the file.
|
|
|
|
Setxattr(ctx context.Context, opts SetxattrOptions) error
|
|
|
|
|
|
|
|
// Removexattr removes the given extended attribute from the file.
|
|
|
|
Removexattr(ctx context.Context, name string) error
|
|
|
|
|
2020-01-29 19:15:59 +00:00
|
|
|
// LockBSD tries to acquire a BSD-style advisory file lock.
|
|
|
|
LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error
|
|
|
|
|
2020-06-10 01:44:57 +00:00
|
|
|
// UnlockBSD releases a BSD-style advisory file lock.
|
2020-01-29 19:15:59 +00:00
|
|
|
UnlockBSD(ctx context.Context, uid lock.UniqueID) error
|
|
|
|
|
|
|
|
// LockPOSIX tries to acquire a POSIX-style advisory file lock.
|
2020-06-17 17:02:41 +00:00
|
|
|
LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, length uint64, whence int16, block lock.Blocker) error
|
2020-01-29 19:15:59 +00:00
|
|
|
|
|
|
|
// UnlockPOSIX releases a POSIX-style advisory file lock.
|
2020-06-17 17:02:41 +00:00
|
|
|
UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, length uint64, whence int16) error
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Dirent holds the information contained in struct linux_dirent64.
|
|
|
|
type Dirent struct {
|
|
|
|
// Name is the filename.
|
|
|
|
Name string
|
|
|
|
|
|
|
|
// Type is the file type, a linux.DT_* constant.
|
|
|
|
Type uint8
|
|
|
|
|
|
|
|
// Ino is the inode number.
|
|
|
|
Ino uint64
|
|
|
|
|
2019-09-20 21:23:20 +00:00
|
|
|
// NextOff is the offset of the *next* Dirent in the directory; that is,
|
|
|
|
// FileDescription.Seek(NextOff, SEEK_SET) (as called by seekdir(3)) will
|
|
|
|
// cause the next call to FileDescription.IterDirents() to yield the next
|
|
|
|
// Dirent. (The offset of the first Dirent in a directory is always 0.)
|
|
|
|
NextOff int64
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// IterDirentsCallback receives Dirents from FileDescriptionImpl.IterDirents.
|
|
|
|
type IterDirentsCallback interface {
|
2020-02-14 22:39:40 +00:00
|
|
|
// Handle handles the given iterated Dirent. If Handle returns a non-nil
|
|
|
|
// error, FileDescriptionImpl.IterDirents must stop iteration and return
|
|
|
|
// the error; the next call to FileDescriptionImpl.IterDirents should
|
|
|
|
// restart with the same Dirent.
|
|
|
|
Handle(dirent Dirent) error
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
|
Add //pkg/sentry/fsimpl/overlay.
Major differences from existing overlay filesystems:
- Linux allows lower layers in an overlay to require revalidation, but not the
upper layer. VFS1 allows the upper layer in an overlay to require
revalidation, but not the lower layer. VFS2 does not allow any layers to
require revalidation. (Now that vfs.MkdirOptions.ForSyntheticMountpoint
exists, no uses of overlay in VFS1 are believed to require upper layer
revalidation; in particular, the requirement that the upper layer support the
creation of "trusted." extended attributes for whiteouts effectively required
the upper filesystem to be tmpfs in most cases.)
- Like VFS1, but unlike Linux, VFS2 overlay does not attempt to make mutations
of the upper layer atomic using a working directory and features like
RENAME_WHITEOUT. (This may change in the future, since not having a working
directory makes error recovery for some operations, e.g. rmdir, particularly
painful.)
- Like Linux, but unlike VFS1, VFS2 represents whiteouts using character
devices with rdev == 0; the equivalent of the whiteout attribute on
directories is xattr trusted.overlay.opaque = "y"; and there is no equivalent
to the whiteout attribute on non-directories since non-directories are never
merged with lower layers.
- Device and inode numbers work as follows:
- In Linux, modulo the xino feature and a special case for when all layers
are the same filesystem:
- Directories use the overlay filesystem's device number and an
ephemeral inode number assigned by the overlay.
- Non-directories that have been copied up use the device and inode
number assigned by the upper filesystem.
- Non-directories that have not been copied up use a per-(overlay,
layer)-pair device number and the inode number assigned by the lower
filesystem.
- In VFS1, device and inode numbers always come from the lower layer unless
"whited out"; this has the adverse effect of requiring interaction with
the lower filesystem even for non-directory files that exist on the upper
layer.
- In VFS2, device and inode numbers are assigned as in Linux, except that
xino and the samefs special case are not supported.
- Like Linux, but unlike VFS1, VFS2 does not attempt to maintain memory mapping
coherence across copy-up. (This may have to change in the future, as users
may be dependent on this property.)
- Like Linux, but unlike VFS1, VFS2 uses the overlayfs mounter's credentials
when interacting with the overlay's layers, rather than the caller's.
- Like Linux, but unlike VFS1, VFS2 permits multiple lower layers in an
overlay.
- Like Linux, but unlike VFS1, VFS2's overlay filesystem is
application-mountable.
Updates #1199
PiperOrigin-RevId: 316019067
2020-06-12 01:33:35 +00:00
|
|
|
// IterDirentsCallbackFunc implements IterDirentsCallback for a function with
|
|
|
|
// the semantics of IterDirentsCallback.Handle.
|
|
|
|
type IterDirentsCallbackFunc func(dirent Dirent) error
|
|
|
|
|
|
|
|
// Handle implements IterDirentsCallback.Handle.
|
|
|
|
func (f IterDirentsCallbackFunc) Handle(dirent Dirent) error {
|
|
|
|
return f(dirent)
|
|
|
|
}
|
|
|
|
|
2019-12-11 02:16:47 +00:00
|
|
|
// OnClose is called when a file descriptor representing the FileDescription is
|
|
|
|
// closed. Returning a non-nil error should not prevent the file descriptor
|
|
|
|
// from being closed.
|
|
|
|
func (fd *FileDescription) OnClose(ctx context.Context) error {
|
|
|
|
return fd.impl.OnClose(ctx)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat returns metadata for the file represented by fd.
|
|
|
|
func (fd *FileDescription) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
|
|
|
stat, err := fd.vd.mount.fs.impl.StatAt(ctx, rp, opts)
|
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return stat, err
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.Stat(ctx, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetStat updates metadata for the file represented by fd.
|
|
|
|
func (fd *FileDescription) SetStat(ctx context.Context, opts SetStatOptions) error {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
|
|
|
err := fd.vd.mount.fs.impl.SetStatAt(ctx, rp, opts)
|
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return err
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.SetStat(ctx, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// StatFS returns metadata for the filesystem containing the file represented
|
|
|
|
// by fd.
|
|
|
|
func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
|
|
|
statfs, err := fd.vd.mount.fs.impl.StatFSAt(ctx, rp)
|
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return statfs, err
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.StatFS(ctx)
|
|
|
|
}
|
|
|
|
|
2020-01-28 21:10:41 +00:00
|
|
|
// Readiness returns fd's I/O readiness.
|
|
|
|
func (fd *FileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
|
|
|
|
return fd.impl.Readiness(mask)
|
|
|
|
}
|
|
|
|
|
|
|
|
// EventRegister registers e for I/O readiness events in mask.
|
|
|
|
func (fd *FileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
|
|
|
|
fd.impl.EventRegister(e, mask)
|
|
|
|
}
|
|
|
|
|
|
|
|
// EventUnregister unregisters e for I/O readiness events.
|
|
|
|
func (fd *FileDescription) EventUnregister(e *waiter.Entry) {
|
|
|
|
fd.impl.EventUnregister(e)
|
|
|
|
}
|
|
|
|
|
2019-12-11 02:16:47 +00:00
|
|
|
// PRead reads from the file represented by fd into dst, starting at the given
|
|
|
|
// offset, and returns the number of bytes read. PRead is permitted to return
|
|
|
|
// partial reads with a nil error.
|
|
|
|
func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
|
2020-04-02 23:57:08 +00:00
|
|
|
if fd.opts.DenyPRead {
|
|
|
|
return 0, syserror.ESPIPE
|
|
|
|
}
|
2020-01-22 20:27:16 +00:00
|
|
|
if !fd.readable {
|
|
|
|
return 0, syserror.EBADF
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.PRead(ctx, dst, offset, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read is similar to PRead, but does not specify an offset.
|
|
|
|
func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
|
2020-01-22 20:27:16 +00:00
|
|
|
if !fd.readable {
|
|
|
|
return 0, syserror.EBADF
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.Read(ctx, dst, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// PWrite writes src to the file represented by fd, starting at the given
|
|
|
|
// offset, and returns the number of bytes written. PWrite is permitted to
|
|
|
|
// return partial writes with a nil error.
|
|
|
|
func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
|
2020-04-02 23:57:08 +00:00
|
|
|
if fd.opts.DenyPWrite {
|
|
|
|
return 0, syserror.ESPIPE
|
|
|
|
}
|
2020-01-22 20:27:16 +00:00
|
|
|
if !fd.writable {
|
|
|
|
return 0, syserror.EBADF
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.PWrite(ctx, src, offset, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write is similar to PWrite, but does not specify an offset.
|
|
|
|
func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
|
2020-01-22 20:27:16 +00:00
|
|
|
if !fd.writable {
|
|
|
|
return 0, syserror.EBADF
|
|
|
|
}
|
2019-12-11 02:16:47 +00:00
|
|
|
return fd.impl.Write(ctx, src, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// IterDirents invokes cb on each entry in the directory represented by fd. If
|
|
|
|
// IterDirents has been called since the last call to Seek, it continues
|
|
|
|
// iteration from the end of the last call.
|
|
|
|
func (fd *FileDescription) IterDirents(ctx context.Context, cb IterDirentsCallback) error {
|
|
|
|
return fd.impl.IterDirents(ctx, cb)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Seek changes fd's offset (assuming one exists) and returns its new value.
|
|
|
|
func (fd *FileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
|
|
|
|
return fd.impl.Seek(ctx, offset, whence)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sync has the semantics of fsync(2).
|
|
|
|
func (fd *FileDescription) Sync(ctx context.Context) error {
|
|
|
|
return fd.impl.Sync(ctx)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ConfigureMMap mutates opts to implement mmap(2) for the file represented by
|
|
|
|
// fd.
|
|
|
|
func (fd *FileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
|
|
|
|
return fd.impl.ConfigureMMap(ctx, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ioctl implements the ioctl(2) syscall.
|
|
|
|
func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
|
|
|
|
return fd.impl.Ioctl(ctx, uio, args)
|
|
|
|
}
|
|
|
|
|
2019-12-18 23:47:24 +00:00
|
|
|
// Listxattr returns all extended attribute names for the file represented by
|
|
|
|
// fd.
|
2020-04-11 02:01:39 +00:00
|
|
|
//
|
|
|
|
// If the size of the list (including a NUL terminating byte after every entry)
|
|
|
|
// would exceed size, ERANGE may be returned. Note that implementations
|
|
|
|
// are free to ignore size entirely and return without error). In all cases,
|
|
|
|
// if size is 0, the list should be returned without error, regardless of size.
|
|
|
|
func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
2020-04-11 02:01:39 +00:00
|
|
|
names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp, size)
|
2019-12-30 19:35:06 +00:00
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return names, err
|
|
|
|
}
|
2020-04-11 02:01:39 +00:00
|
|
|
names, err := fd.impl.Listxattr(ctx, size)
|
2019-12-18 23:47:24 +00:00
|
|
|
if err == syserror.ENOTSUP {
|
|
|
|
// Linux doesn't actually return ENOTSUP in this case; instead,
|
|
|
|
// fs/xattr.c:vfs_listxattr() falls back to allowing the security
|
|
|
|
// subsystem to return security extended attributes, which by default
|
|
|
|
// don't exist.
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
return names, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Getxattr returns the value associated with the given extended attribute for
|
|
|
|
// the file represented by fd.
|
2020-04-11 02:01:39 +00:00
|
|
|
//
|
|
|
|
// If the size of the return value exceeds opts.Size, ERANGE may be returned
|
|
|
|
// (note that implementations are free to ignore opts.Size entirely and return
|
|
|
|
// without error). In all cases, if opts.Size is 0, the value should be
|
|
|
|
// returned without error, regardless of size.
|
|
|
|
func (fd *FileDescription) Getxattr(ctx context.Context, opts *GetxattrOptions) (string, error) {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
2020-04-11 02:01:39 +00:00
|
|
|
val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, *opts)
|
2019-12-30 19:35:06 +00:00
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return val, err
|
|
|
|
}
|
2020-04-11 02:01:39 +00:00
|
|
|
return fd.impl.Getxattr(ctx, *opts)
|
2019-12-18 23:47:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Setxattr changes the value associated with the given extended attribute for
|
|
|
|
// the file represented by fd.
|
2020-04-11 02:01:39 +00:00
|
|
|
func (fd *FileDescription) Setxattr(ctx context.Context, opts *SetxattrOptions) error {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
2020-04-11 02:01:39 +00:00
|
|
|
err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, *opts)
|
2019-12-30 19:35:06 +00:00
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return err
|
|
|
|
}
|
2020-04-11 02:01:39 +00:00
|
|
|
return fd.impl.Setxattr(ctx, *opts)
|
2019-12-18 23:47:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Removexattr removes the given extended attribute from the file represented
|
|
|
|
// by fd.
|
|
|
|
func (fd *FileDescription) Removexattr(ctx context.Context, name string) error {
|
2019-12-30 19:35:06 +00:00
|
|
|
if fd.opts.UseDentryMetadata {
|
|
|
|
vfsObj := fd.vd.mount.vfs
|
|
|
|
rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
|
|
|
|
Root: fd.vd,
|
|
|
|
Start: fd.vd,
|
|
|
|
})
|
|
|
|
err := fd.vd.mount.fs.impl.RemovexattrAt(ctx, rp, name)
|
|
|
|
vfsObj.putResolvingPath(rp)
|
|
|
|
return err
|
|
|
|
}
|
2019-12-18 23:47:24 +00:00
|
|
|
return fd.impl.Removexattr(ctx, name)
|
|
|
|
}
|
|
|
|
|
2019-12-11 02:16:47 +00:00
|
|
|
// SyncFS instructs the filesystem containing fd to execute the semantics of
|
|
|
|
// syncfs(2).
|
|
|
|
func (fd *FileDescription) SyncFS(ctx context.Context) error {
|
|
|
|
return fd.vd.mount.fs.impl.Sync(ctx)
|
|
|
|
}
|
2019-12-12 21:17:47 +00:00
|
|
|
|
|
|
|
// MappedName implements memmap.MappingIdentity.MappedName.
|
|
|
|
func (fd *FileDescription) MappedName(ctx context.Context) string {
|
|
|
|
vfsroot := RootFromContext(ctx)
|
|
|
|
s, _ := fd.vd.mount.vfs.PathnameWithDeleted(ctx, vfsroot, fd.vd)
|
|
|
|
if vfsroot.Ok() {
|
|
|
|
vfsroot.DecRef()
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeviceID implements memmap.MappingIdentity.DeviceID.
|
|
|
|
func (fd *FileDescription) DeviceID() uint64 {
|
2019-12-30 19:35:06 +00:00
|
|
|
stat, err := fd.Stat(context.Background(), StatOptions{
|
2019-12-12 21:17:47 +00:00
|
|
|
// There is no STATX_DEV; we assume that Stat will return it if it's
|
|
|
|
// available regardless of mask.
|
|
|
|
Mask: 0,
|
|
|
|
// fs/proc/task_mmu.c:show_map_vma() just reads inode::i_sb->s_dev
|
|
|
|
// directly.
|
|
|
|
Sync: linux.AT_STATX_DONT_SYNC,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return uint64(linux.MakeDeviceID(uint16(stat.DevMajor), stat.DevMinor))
|
|
|
|
}
|
|
|
|
|
|
|
|
// InodeID implements memmap.MappingIdentity.InodeID.
|
|
|
|
func (fd *FileDescription) InodeID() uint64 {
|
2019-12-30 19:35:06 +00:00
|
|
|
stat, err := fd.Stat(context.Background(), StatOptions{
|
2019-12-12 21:17:47 +00:00
|
|
|
Mask: linux.STATX_INO,
|
|
|
|
// fs/proc/task_mmu.c:show_map_vma() just reads inode::i_ino directly.
|
|
|
|
Sync: linux.AT_STATX_DONT_SYNC,
|
|
|
|
})
|
|
|
|
if err != nil || stat.Mask&linux.STATX_INO == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return stat.Ino
|
|
|
|
}
|
|
|
|
|
|
|
|
// Msync implements memmap.MappingIdentity.Msync.
|
|
|
|
func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) error {
|
2019-12-30 19:35:06 +00:00
|
|
|
return fd.Sync(ctx)
|
2019-12-12 21:17:47 +00:00
|
|
|
}
|
2020-06-10 01:44:57 +00:00
|
|
|
|
|
|
|
// LockBSD tries to acquire a BSD-style advisory file lock.
|
|
|
|
func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, blocker lock.Blocker) error {
|
|
|
|
atomic.StoreUint32(&fd.usedLockBSD, 1)
|
|
|
|
return fd.impl.LockBSD(ctx, fd, lockType, blocker)
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnlockBSD releases a BSD-style advisory file lock.
|
|
|
|
func (fd *FileDescription) UnlockBSD(ctx context.Context) error {
|
|
|
|
return fd.impl.UnlockBSD(ctx, fd)
|
|
|
|
}
|
2020-06-17 17:02:41 +00:00
|
|
|
|
|
|
|
// LockPOSIX locks a POSIX-style file range lock.
|
|
|
|
func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, end uint64, whence int16, block lock.Blocker) error {
|
|
|
|
return fd.impl.LockPOSIX(ctx, uid, t, start, end, whence, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnlockPOSIX unlocks a POSIX-style file range lock.
|
|
|
|
func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, end uint64, whence int16) error {
|
|
|
|
return fd.impl.UnlockPOSIX(ctx, uid, start, end, whence)
|
|
|
|
}
|