gvisor/pkg/sentry/fsimpl/tmpfs/tmpfs.go

770 lines
23 KiB
Go

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package tmpfs provides an in-memory filesystem whose contents are
// application-mutable, consistent with Linux's tmpfs.
//
// Lock order:
//
// filesystem.mu
// inode.mu
// regularFileFD.offMu
// *** "memmap.Mappable locks" below this point
// regularFile.mapsMu
// *** "memmap.Mappable locks taken by Translate" below this point
// regularFile.dataMu
// directory.iterMu
package tmpfs
import (
"fmt"
"math"
"strconv"
"strings"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sentry/vfs/memxattr"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
// Name is the default filesystem name.
const Name = "tmpfs"
// FilesystemType implements vfs.FilesystemType.
type FilesystemType struct{}
// filesystem implements vfs.FilesystemImpl.
type filesystem struct {
vfsfs vfs.Filesystem
// memFile is used to allocate pages to for regular files.
memFile *pgalloc.MemoryFile
// clock is a realtime clock used to set timestamps in file operations.
clock time.Clock
// devMinor is the filesystem's minor device number. devMinor is immutable.
devMinor uint32
// mu serializes changes to the Dentry tree.
mu sync.RWMutex
nextInoMinusOne uint64 // accessed using atomic memory operations
}
// Name implements vfs.FilesystemType.Name.
func (FilesystemType) Name() string {
return Name
}
// FilesystemOpts is used to pass configuration data to tmpfs.
type FilesystemOpts struct {
// RootFileType is the FileType of the filesystem root. Valid values
// are: S_IFDIR, S_IFREG, and S_IFLNK. Defaults to S_IFDIR.
RootFileType uint16
// RootSymlinkTarget is the target of the root symlink. Only valid if
// RootFileType == S_IFLNK.
RootSymlinkTarget string
// FilesystemType allows setting a different FilesystemType for this
// tmpfs filesystem. This allows tmpfs to "impersonate" other
// filesystems, like ramdiskfs and cgroupfs.
FilesystemType vfs.FilesystemType
}
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, _ string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx)
if memFileProvider == nil {
panic("MemoryFileProviderFromContext returned nil")
}
rootFileType := uint16(linux.S_IFDIR)
newFSType := vfs.FilesystemType(&fstype)
tmpfsOpts, ok := opts.InternalData.(FilesystemOpts)
if ok {
if tmpfsOpts.RootFileType != 0 {
rootFileType = tmpfsOpts.RootFileType
}
if tmpfsOpts.FilesystemType != nil {
newFSType = tmpfsOpts.FilesystemType
}
}
mopts := vfs.GenericParseMountOptions(opts.Data)
rootMode := linux.FileMode(0777)
if rootFileType == linux.S_IFDIR {
rootMode = 01777
}
modeStr, ok := mopts["mode"]
if ok {
delete(mopts, "mode")
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid mode: %q", modeStr)
return nil, nil, syserror.EINVAL
}
rootMode = linux.FileMode(mode & 07777)
}
rootKUID := creds.EffectiveKUID
uidStr, ok := mopts["uid"]
if ok {
delete(mopts, "uid")
uid, err := strconv.ParseUint(uidStr, 10, 32)
if err != nil {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid uid: %q", uidStr)
return nil, nil, syserror.EINVAL
}
kuid := creds.UserNamespace.MapToKUID(auth.UID(uid))
if !kuid.Ok() {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped uid: %d", uid)
return nil, nil, syserror.EINVAL
}
rootKUID = kuid
}
rootKGID := creds.EffectiveKGID
gidStr, ok := mopts["gid"]
if ok {
delete(mopts, "gid")
gid, err := strconv.ParseUint(gidStr, 10, 32)
if err != nil {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid gid: %q", gidStr)
return nil, nil, syserror.EINVAL
}
kgid := creds.UserNamespace.MapToKGID(auth.GID(gid))
if !kgid.Ok() {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped gid: %d", gid)
return nil, nil, syserror.EINVAL
}
rootKGID = kgid
}
if len(mopts) != 0 {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unknown options: %v", mopts)
return nil, nil, syserror.EINVAL
}
devMinor, err := vfsObj.GetAnonBlockDevMinor()
if err != nil {
return nil, nil, err
}
clock := time.RealtimeClockFromContext(ctx)
fs := filesystem{
memFile: memFileProvider.MemoryFile(),
clock: clock,
devMinor: devMinor,
}
fs.vfsfs.Init(vfsObj, newFSType, &fs)
var root *dentry
switch rootFileType {
case linux.S_IFREG:
root = fs.newDentry(fs.newRegularFile(rootKUID, rootKGID, rootMode))
case linux.S_IFLNK:
root = fs.newDentry(fs.newSymlink(rootKUID, rootKGID, rootMode, tmpfsOpts.RootSymlinkTarget))
case linux.S_IFDIR:
root = &fs.newDirectory(rootKUID, rootKGID, rootMode).dentry
default:
fs.vfsfs.DecRef()
return nil, nil, fmt.Errorf("invalid tmpfs root file type: %#o", rootFileType)
}
return &fs.vfsfs, &root.vfsd, nil
}
// NewFilesystem returns a new tmpfs filesystem.
func NewFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*vfs.Filesystem, *vfs.Dentry, error) {
return FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "", vfs.GetFilesystemOptions{})
}
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release() {
fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
}
// dentry implements vfs.DentryImpl.
type dentry struct {
vfsd vfs.Dentry
// parent is this dentry's parent directory. Each referenced dentry holds a
// reference on parent.dentry. If this dentry is a filesystem root, parent
// is nil. parent is protected by filesystem.mu.
parent *dentry
// name is the name of this dentry in its parent. If this dentry is a
// filesystem root, name is the empty string. name is protected by
// filesystem.mu.
name string
// unlinked indicates whether this dentry has been unlinked from its parent.
// It is only set to true on an unlink operation, and never set from true to
// false. unlinked is protected by filesystem.mu.
unlinked bool
// dentryEntry (ugh) links dentries into their parent directory.childList.
dentryEntry
// inode is the inode represented by this dentry. Multiple Dentries may
// share a single non-directory inode (with hard links). inode is
// immutable.
//
// tmpfs doesn't count references on dentries; because the dentry tree is
// the sole source of truth, it is by definition always consistent with the
// state of the filesystem. However, it does count references on inodes,
// because inode resources are released when all references are dropped.
// dentry therefore forwards reference counting directly to inode.
inode *inode
}
func (fs *filesystem) newDentry(inode *inode) *dentry {
d := &dentry{
inode: inode,
}
d.vfsd.Init(d)
return d
}
// IncRef implements vfs.DentryImpl.IncRef.
func (d *dentry) IncRef() {
d.inode.incRef()
}
// TryIncRef implements vfs.DentryImpl.TryIncRef.
func (d *dentry) TryIncRef() bool {
return d.inode.tryIncRef()
}
// DecRef implements vfs.DentryImpl.DecRef.
func (d *dentry) DecRef() {
d.inode.decRef()
}
// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {
if d.inode.isDir() {
events |= linux.IN_ISDIR
}
// The ordering below is important, Linux always notifies the parent first.
if d.parent != nil {
// Note that d.parent or d.name may be stale if there is a concurrent
// rename operation. Inotify does not provide consistency guarantees.
d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.unlinked)
}
d.inode.watches.Notify("", events, cookie, et)
}
// Watches implements vfs.DentryImpl.Watches.
func (d *dentry) Watches() *vfs.Watches {
return &d.inode.watches
}
// inode represents a filesystem object.
type inode struct {
// fs is the owning filesystem. fs is immutable.
fs *filesystem
// refs is a reference count. refs is accessed using atomic memory
// operations.
//
// A reference is held on all inodes as long as they are reachable in the
// filesystem tree, i.e. nlink is nonzero. This reference is dropped when
// nlink reaches 0.
refs int64
// xattrs implements extended attributes.
//
// TODO(b/148380782): Support xattrs other than user.*
xattrs memxattr.SimpleExtendedAttributes
// Inode metadata. Writing multiple fields atomically requires holding
// mu, othewise atomic operations can be used.
mu sync.Mutex
mode uint32 // file type and mode
nlink uint32 // protected by filesystem.mu instead of inode.mu
uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
gid uint32 // auth.KGID, but ...
ino uint64 // immutable
// Linux's tmpfs has no concept of btime.
atime int64 // nanoseconds
ctime int64 // nanoseconds
mtime int64 // nanoseconds
locks lock.FileLocks
// Inotify watches for this inode.
watches vfs.Watches
impl interface{} // immutable
}
const maxLinks = math.MaxUint32
func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth.KGID, mode linux.FileMode) {
if mode.FileType() == 0 {
panic("file type is required in FileMode")
}
i.fs = fs
i.refs = 1
i.mode = uint32(mode)
i.uid = uint32(kuid)
i.gid = uint32(kgid)
i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
// Tmpfs creation sets atime, ctime, and mtime to current time.
now := fs.clock.Now().Nanoseconds()
i.atime = now
i.ctime = now
i.mtime = now
// i.nlink initialized by caller
i.watches = vfs.Watches{}
i.impl = impl
}
// incLinksLocked increments i's link count.
//
// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
// i.nlink < maxLinks.
func (i *inode) incLinksLocked() {
if i.nlink == 0 {
panic("tmpfs.inode.incLinksLocked() called with no existing links")
}
if i.nlink == maxLinks {
panic("tmpfs.inode.incLinksLocked() called with maximum link count")
}
atomic.AddUint32(&i.nlink, 1)
}
// decLinksLocked decrements i's link count. If the link count reaches 0, we
// remove a reference on i as well.
//
// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
func (i *inode) decLinksLocked() {
if i.nlink == 0 {
panic("tmpfs.inode.decLinksLocked() called with no existing links")
}
if atomic.AddUint32(&i.nlink, ^uint32(0)) == 0 {
i.decRef()
}
}
func (i *inode) incRef() {
if atomic.AddInt64(&i.refs, 1) <= 1 {
panic("tmpfs.inode.incRef() called without holding a reference")
}
}
func (i *inode) tryIncRef() bool {
for {
refs := atomic.LoadInt64(&i.refs)
if refs == 0 {
return false
}
if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
return true
}
}
}
func (i *inode) decRef() {
if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
i.watches.HandleDeletion()
if regFile, ok := i.impl.(*regularFile); ok {
// Release memory used by regFile to store data. Since regFile is
// no longer usable, we don't need to grab any locks or update any
// metadata.
regFile.data.DropAll(regFile.memFile)
}
} else if refs < 0 {
panic("tmpfs.inode.decRef() called without holding a reference")
}
}
func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
mode := linux.FileMode(atomic.LoadUint32(&i.mode))
return vfs.GenericCheckPermissions(creds, ats, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
}
// Go won't inline this function, and returning linux.Statx (which is quite
// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
// output parameter.
//
// Note that Linux does not guarantee to return consistent data (in the case of
// a concurrent modification), so we do not require holding inode.mu.
func (i *inode) statTo(stat *linux.Statx) {
stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK |
linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE |
linux.STATX_BLOCKS | linux.STATX_ATIME | linux.STATX_CTIME |
linux.STATX_MTIME
stat.Blksize = usermem.PageSize
stat.Nlink = atomic.LoadUint32(&i.nlink)
stat.UID = atomic.LoadUint32(&i.uid)
stat.GID = atomic.LoadUint32(&i.gid)
stat.Mode = uint16(atomic.LoadUint32(&i.mode))
stat.Ino = i.ino
stat.Atime = linux.NsecToStatxTimestamp(i.atime)
stat.Ctime = linux.NsecToStatxTimestamp(i.ctime)
stat.Mtime = linux.NsecToStatxTimestamp(i.mtime)
stat.DevMajor = linux.UNNAMED_MAJOR
stat.DevMinor = i.fs.devMinor
switch impl := i.impl.(type) {
case *regularFile:
stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
stat.Size = uint64(atomic.LoadUint64(&impl.size))
// TODO(jamieliu): This should be impl.data.Span() / 512, but this is
// too expensive to compute here. Cache it in regularFile.
stat.Blocks = allocatedBlocksForSize(stat.Size)
case *directory:
// "20" is mm/shmem.c:BOGO_DIRENT_SIZE.
stat.Size = 20 * (2 + uint64(atomic.LoadInt64(&impl.numChildren)))
// stat.Blocks is 0.
case *symlink:
stat.Size = uint64(len(impl.target))
// stat.Blocks is 0.
case *namedPipe, *socketFile:
// stat.Size and stat.Blocks are 0.
case *deviceFile:
// stat.Size and stat.Blocks are 0.
stat.RdevMajor = impl.major
stat.RdevMinor = impl.minor
default:
panic(fmt.Sprintf("unknown inode type: %T", i.impl))
}
}
func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx) error {
if stat.Mask == 0 {
return nil
}
if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE) != 0 {
return syserror.EPERM
}
mode := linux.FileMode(atomic.LoadUint32(&i.mode))
if err := vfs.CheckSetStat(ctx, creds, stat, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
return err
}
i.mu.Lock()
defer i.mu.Unlock()
var (
needsMtimeBump bool
needsCtimeBump bool
)
mask := stat.Mask
if mask&linux.STATX_MODE != 0 {
ft := atomic.LoadUint32(&i.mode) & linux.S_IFMT
atomic.StoreUint32(&i.mode, ft|uint32(stat.Mode&^linux.S_IFMT))
needsCtimeBump = true
}
if mask&linux.STATX_UID != 0 {
atomic.StoreUint32(&i.uid, stat.UID)
needsCtimeBump = true
}
if mask&linux.STATX_GID != 0 {
atomic.StoreUint32(&i.gid, stat.GID)
needsCtimeBump = true
}
if mask&linux.STATX_SIZE != 0 {
switch impl := i.impl.(type) {
case *regularFile:
updated, err := impl.truncateLocked(stat.Size)
if err != nil {
return err
}
if updated {
needsMtimeBump = true
needsCtimeBump = true
}
case *directory:
return syserror.EISDIR
default:
return syserror.EINVAL
}
}
now := i.fs.clock.Now().Nanoseconds()
if mask&linux.STATX_ATIME != 0 {
if stat.Atime.Nsec == linux.UTIME_NOW {
atomic.StoreInt64(&i.atime, now)
} else {
atomic.StoreInt64(&i.atime, stat.Atime.ToNsecCapped())
}
needsCtimeBump = true
}
if mask&linux.STATX_MTIME != 0 {
if stat.Mtime.Nsec == linux.UTIME_NOW {
atomic.StoreInt64(&i.mtime, now)
} else {
atomic.StoreInt64(&i.mtime, stat.Mtime.ToNsecCapped())
}
needsCtimeBump = true
// Ignore the mtime bump, since we just set it ourselves.
needsMtimeBump = false
}
if mask&linux.STATX_CTIME != 0 {
if stat.Ctime.Nsec == linux.UTIME_NOW {
atomic.StoreInt64(&i.ctime, now)
} else {
atomic.StoreInt64(&i.ctime, stat.Ctime.ToNsecCapped())
}
// Ignore the ctime bump, since we just set it ourselves.
needsCtimeBump = false
}
if needsMtimeBump {
atomic.StoreInt64(&i.mtime, now)
}
if needsCtimeBump {
atomic.StoreInt64(&i.ctime, now)
}
return nil
}
// allocatedBlocksForSize returns the number of 512B blocks needed to
// accommodate the given size in bytes, as appropriate for struct
// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
// size is independent of the "preferred block size for I/O", struct
// stat::st_blksize and struct statx::stx_blksize.)
func allocatedBlocksForSize(size uint64) uint64 {
return (size + 511) / 512
}
func (i *inode) direntType() uint8 {
switch impl := i.impl.(type) {
case *regularFile:
return linux.DT_REG
case *directory:
return linux.DT_DIR
case *symlink:
return linux.DT_LNK
case *socketFile:
return linux.DT_SOCK
case *deviceFile:
switch impl.kind {
case vfs.BlockDevice:
return linux.DT_BLK
case vfs.CharDevice:
return linux.DT_CHR
default:
panic(fmt.Sprintf("unknown vfs.DeviceKind: %v", impl.kind))
}
default:
panic(fmt.Sprintf("unknown inode type: %T", i.impl))
}
}
func (i *inode) isDir() bool {
return linux.FileMode(i.mode).FileType() == linux.S_IFDIR
}
func (i *inode) touchAtime(mnt *vfs.Mount) {
if mnt.Flags.NoATime {
return
}
if err := mnt.CheckBeginWrite(); err != nil {
return
}
now := i.fs.clock.Now().Nanoseconds()
i.mu.Lock()
atomic.StoreInt64(&i.atime, now)
i.mu.Unlock()
mnt.EndWrite()
}
// Preconditions: The caller has called vfs.Mount.CheckBeginWrite().
func (i *inode) touchCtime() {
now := i.fs.clock.Now().Nanoseconds()
i.mu.Lock()
atomic.StoreInt64(&i.ctime, now)
i.mu.Unlock()
}
// Preconditions: The caller has called vfs.Mount.CheckBeginWrite().
func (i *inode) touchCMtime() {
now := i.fs.clock.Now().Nanoseconds()
i.mu.Lock()
atomic.StoreInt64(&i.mtime, now)
atomic.StoreInt64(&i.ctime, now)
i.mu.Unlock()
}
// Preconditions: The caller has called vfs.Mount.CheckBeginWrite() and holds
// inode.mu.
func (i *inode) touchCMtimeLocked() {
now := i.fs.clock.Now().Nanoseconds()
atomic.StoreInt64(&i.mtime, now)
atomic.StoreInt64(&i.ctime, now)
}
func (i *inode) listxattr(size uint64) ([]string, error) {
return i.xattrs.Listxattr(size)
}
func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) {
if err := i.checkPermissions(creds, vfs.MayRead); err != nil {
return "", err
}
if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
return "", syserror.EOPNOTSUPP
}
if !i.userXattrSupported() {
return "", syserror.ENODATA
}
return i.xattrs.Getxattr(opts)
}
func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error {
if err := i.checkPermissions(creds, vfs.MayWrite); err != nil {
return err
}
if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) {
return syserror.EOPNOTSUPP
}
if !i.userXattrSupported() {
return syserror.EPERM
}
return i.xattrs.Setxattr(opts)
}
func (i *inode) removexattr(creds *auth.Credentials, name string) error {
if err := i.checkPermissions(creds, vfs.MayWrite); err != nil {
return err
}
if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
return syserror.EOPNOTSUPP
}
if !i.userXattrSupported() {
return syserror.EPERM
}
return i.xattrs.Removexattr(name)
}
// Extended attributes in the user.* namespace are only supported for regular
// files and directories.
func (i *inode) userXattrSupported() bool {
filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode)
return filetype == linux.S_IFREG || filetype == linux.S_IFDIR
}
// fileDescription is embedded by tmpfs implementations of
// vfs.FileDescriptionImpl.
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
vfs.LockFD
}
func (fd *fileDescription) filesystem() *filesystem {
return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
}
func (fd *fileDescription) dentry() *dentry {
return fd.vfsfd.Dentry().Impl().(*dentry)
}
func (fd *fileDescription) inode() *inode {
return fd.dentry().inode
}
// Stat implements vfs.FileDescriptionImpl.Stat.
func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
var stat linux.Statx
fd.inode().statTo(&stat)
return stat, nil
}
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
creds := auth.CredentialsFromContext(ctx)
d := fd.dentry()
if err := d.inode.setStat(ctx, creds, &opts.Stat); err != nil {
return err
}
if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
d.InotifyWithParent(ev, 0, vfs.InodeEvent)
}
return nil
}
// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
return fd.inode().listxattr(size)
}
// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) {
return fd.inode().getxattr(auth.CredentialsFromContext(ctx), &opts)
}
// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
d := fd.dentry()
if err := d.inode.setxattr(auth.CredentialsFromContext(ctx), &opts); err != nil {
return err
}
// Generate inotify events.
d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
}
// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
d := fd.dentry()
if err := d.inode.removexattr(auth.CredentialsFromContext(ctx), name); err != nil {
return err
}
// Generate inotify events.
d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent)
return nil
}
// NewMemfd creates a new tmpfs regular file and file description that can back
// an anonymous fd created by memfd_create.
func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name string) (*vfs.FileDescription, error) {
fs, ok := mount.Filesystem().Impl().(*filesystem)
if !ok {
panic("NewMemfd() called with non-tmpfs mount")
}
// Per Linux, mm/shmem.c:__shmem_file_setup(), memfd inodes are set up with
// S_IRWXUGO.
inode := fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, 0777)
rf := inode.impl.(*regularFile)
if allowSeals {
rf.seals = 0
}
d := fs.newDentry(inode)
defer d.DecRef()
d.name = name
// Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with
// FMODE_READ | FMODE_WRITE.
var fd regularFileFD
flags := uint32(linux.O_RDWR)
if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
return nil, err
}
return &fd.vfsfd, nil
}