2019-07-18 22:09:14 +00:00
|
|
|
// Copyright 2019 The gVisor Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package memfs provides a filesystem implementation that behaves like tmpfs:
|
|
|
|
// the Dentry tree is the sole source of truth for the state of the filesystem.
|
|
|
|
//
|
|
|
|
// memfs is intended primarily to demonstrate filesystem implementation
|
|
|
|
// patterns. Real uses cases for an in-memory filesystem should use tmpfs
|
|
|
|
// instead.
|
|
|
|
//
|
|
|
|
// Lock order:
|
|
|
|
//
|
2019-08-08 18:45:33 +00:00
|
|
|
// filesystem.mu
|
2019-07-18 22:09:14 +00:00
|
|
|
// regularFileFD.offMu
|
|
|
|
// regularFile.mu
|
2019-08-08 18:45:33 +00:00
|
|
|
// inode.mu
|
2019-07-18 22:09:14 +00:00
|
|
|
package memfs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
"sync/atomic"
|
|
|
|
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/context"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
|
|
)
|
|
|
|
|
|
|
|
// FilesystemType implements vfs.FilesystemType.
|
|
|
|
type FilesystemType struct{}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// filesystem implements vfs.FilesystemImpl.
|
|
|
|
type filesystem struct {
|
2019-07-18 22:09:14 +00:00
|
|
|
vfsfs vfs.Filesystem
|
|
|
|
|
|
|
|
// mu serializes changes to the Dentry tree.
|
|
|
|
mu sync.RWMutex
|
|
|
|
|
|
|
|
nextInoMinusOne uint64 // accessed using atomic memory operations
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewFilesystem implements vfs.FilesystemType.NewFilesystem.
|
|
|
|
func (fstype FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
|
2019-08-08 18:45:33 +00:00
|
|
|
var fs filesystem
|
2019-07-18 22:09:14 +00:00
|
|
|
fs.vfsfs.Init(&fs)
|
|
|
|
root := fs.newDentry(fs.newDirectory(creds, 01777))
|
|
|
|
return &fs.vfsfs, &root.vfsd, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release implements vfs.FilesystemImpl.Release.
|
2019-08-08 18:45:33 +00:00
|
|
|
func (fs *filesystem) Release() {
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Sync implements vfs.FilesystemImpl.Sync.
|
2019-08-08 18:45:33 +00:00
|
|
|
func (fs *filesystem) Sync(ctx context.Context) error {
|
2019-07-18 22:09:14 +00:00
|
|
|
// All filesystem state is in-memory.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// dentry implements vfs.DentryImpl.
|
|
|
|
type dentry struct {
|
2019-07-18 22:09:14 +00:00
|
|
|
vfsd vfs.Dentry
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// inode is the inode represented by this dentry. Multiple Dentries may
|
|
|
|
// share a single non-directory inode (with hard links). inode is
|
2019-07-18 22:09:14 +00:00
|
|
|
// immutable.
|
2019-08-08 18:45:33 +00:00
|
|
|
inode *inode
|
2019-07-18 22:09:14 +00:00
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// memfs doesn't count references on dentries; because the dentry tree is
|
2019-07-18 22:09:14 +00:00
|
|
|
// the sole source of truth, it is by definition always consistent with the
|
2019-08-08 18:45:33 +00:00
|
|
|
// state of the filesystem. However, it does count references on inodes,
|
|
|
|
// because inode resources are released when all references are dropped.
|
2019-07-18 22:09:14 +00:00
|
|
|
// (memfs doesn't really have resources to release, but we implement
|
|
|
|
// reference counting because tmpfs regular files will.)
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// dentryEntry (ugh) links dentries into their parent directory.childList.
|
2019-07-18 22:09:14 +00:00
|
|
|
dentryEntry
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (fs *filesystem) newDentry(inode *inode) *dentry {
|
|
|
|
d := &dentry{
|
2019-07-18 22:09:14 +00:00
|
|
|
inode: inode,
|
|
|
|
}
|
|
|
|
d.vfsd.Init(d)
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
|
|
|
// IncRef implements vfs.DentryImpl.IncRef.
|
2019-08-08 18:45:33 +00:00
|
|
|
func (d *dentry) IncRef(vfsfs *vfs.Filesystem) {
|
2019-07-18 22:09:14 +00:00
|
|
|
d.inode.incRef()
|
|
|
|
}
|
|
|
|
|
|
|
|
// TryIncRef implements vfs.DentryImpl.TryIncRef.
|
2019-08-08 18:45:33 +00:00
|
|
|
func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool {
|
2019-07-18 22:09:14 +00:00
|
|
|
return d.inode.tryIncRef()
|
|
|
|
}
|
|
|
|
|
|
|
|
// DecRef implements vfs.DentryImpl.DecRef.
|
2019-08-08 18:45:33 +00:00
|
|
|
func (d *dentry) DecRef(vfsfs *vfs.Filesystem) {
|
2019-07-18 22:09:14 +00:00
|
|
|
d.inode.decRef()
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// inode represents a filesystem object.
|
|
|
|
type inode struct {
|
2019-07-18 22:09:14 +00:00
|
|
|
// refs is a reference count. refs is accessed using atomic memory
|
|
|
|
// operations.
|
|
|
|
//
|
2019-08-08 18:45:33 +00:00
|
|
|
// A reference is held on all inodes that are reachable in the filesystem
|
2019-07-18 22:09:14 +00:00
|
|
|
// tree. For non-directories (which may have multiple hard links), this
|
|
|
|
// means that a reference is dropped when nlink reaches 0. For directories,
|
|
|
|
// nlink never reaches 0 due to the "." entry; instead,
|
2019-08-08 18:45:33 +00:00
|
|
|
// filesystem.RmdirAt() drops the reference.
|
2019-07-18 22:09:14 +00:00
|
|
|
refs int64
|
|
|
|
|
|
|
|
// Inode metadata; protected by mu and accessed using atomic memory
|
|
|
|
// operations unless otherwise specified.
|
|
|
|
mu sync.RWMutex
|
|
|
|
mode uint32 // excluding file type bits, which are based on impl
|
2019-08-08 18:45:33 +00:00
|
|
|
nlink uint32 // protected by filesystem.mu instead of inode.mu
|
2019-07-18 22:09:14 +00:00
|
|
|
uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
|
|
|
|
gid uint32 // auth.KGID, but ...
|
|
|
|
ino uint64 // immutable
|
|
|
|
|
|
|
|
impl interface{} // immutable
|
|
|
|
}
|
|
|
|
|
2019-10-04 21:19:08 +00:00
|
|
|
func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
|
2019-07-18 22:09:14 +00:00
|
|
|
i.refs = 1
|
|
|
|
i.mode = uint32(mode)
|
|
|
|
i.uid = uint32(creds.EffectiveKUID)
|
|
|
|
i.gid = uint32(creds.EffectiveKGID)
|
|
|
|
i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
|
|
|
|
// i.nlink initialized by caller
|
|
|
|
i.impl = impl
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// Preconditions: filesystem.mu must be locked for writing.
|
|
|
|
func (i *inode) incLinksLocked() {
|
2019-07-18 22:09:14 +00:00
|
|
|
if atomic.AddUint32(&i.nlink, 1) <= 1 {
|
2019-08-08 18:45:33 +00:00
|
|
|
panic("memfs.inode.incLinksLocked() called with no existing links")
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
// Preconditions: filesystem.mu must be locked for writing.
|
|
|
|
func (i *inode) decLinksLocked() {
|
2019-07-18 22:09:14 +00:00
|
|
|
if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 {
|
|
|
|
i.decRef()
|
|
|
|
} else if nlink == ^uint32(0) { // negative overflow
|
2019-08-08 18:45:33 +00:00
|
|
|
panic("memfs.inode.decLinksLocked() called with no existing links")
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (i *inode) incRef() {
|
2019-07-18 22:09:14 +00:00
|
|
|
if atomic.AddInt64(&i.refs, 1) <= 1 {
|
2019-08-08 18:45:33 +00:00
|
|
|
panic("memfs.inode.incRef() called without holding a reference")
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (i *inode) tryIncRef() bool {
|
2019-07-18 22:09:14 +00:00
|
|
|
for {
|
|
|
|
refs := atomic.LoadInt64(&i.refs)
|
|
|
|
if refs == 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (i *inode) decRef() {
|
2019-07-18 22:09:14 +00:00
|
|
|
if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
|
|
|
|
// This is unnecessary; it's mostly to simulate what tmpfs would do.
|
|
|
|
if regfile, ok := i.impl.(*regularFile); ok {
|
|
|
|
regfile.mu.Lock()
|
|
|
|
regfile.data = nil
|
|
|
|
atomic.StoreInt64(®file.dataLen, 0)
|
|
|
|
regfile.mu.Unlock()
|
|
|
|
}
|
|
|
|
} else if refs < 0 {
|
2019-08-08 18:45:33 +00:00
|
|
|
panic("memfs.inode.decRef() called without holding a reference")
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
|
2019-07-18 22:09:14 +00:00
|
|
|
return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Go won't inline this function, and returning linux.Statx (which is quite
|
|
|
|
// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
|
|
|
|
// output parameter.
|
2019-08-08 18:45:33 +00:00
|
|
|
func (i *inode) statTo(stat *linux.Statx) {
|
2019-07-18 22:09:14 +00:00
|
|
|
stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
|
|
|
|
stat.Blksize = 1 // usermem.PageSize in tmpfs
|
|
|
|
stat.Nlink = atomic.LoadUint32(&i.nlink)
|
|
|
|
stat.UID = atomic.LoadUint32(&i.uid)
|
|
|
|
stat.GID = atomic.LoadUint32(&i.gid)
|
|
|
|
stat.Mode = uint16(atomic.LoadUint32(&i.mode))
|
|
|
|
stat.Ino = i.ino
|
|
|
|
// TODO: device number
|
|
|
|
switch impl := i.impl.(type) {
|
|
|
|
case *regularFile:
|
|
|
|
stat.Mode |= linux.S_IFREG
|
|
|
|
stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
|
|
|
|
stat.Size = uint64(atomic.LoadInt64(&impl.dataLen))
|
|
|
|
// In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in
|
|
|
|
// a uint64 accessed using atomic memory operations to avoid taking
|
|
|
|
// locks).
|
|
|
|
stat.Blocks = allocatedBlocksForSize(stat.Size)
|
|
|
|
case *directory:
|
|
|
|
stat.Mode |= linux.S_IFDIR
|
|
|
|
case *symlink:
|
|
|
|
stat.Mode |= linux.S_IFLNK
|
|
|
|
stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
|
|
|
|
stat.Size = uint64(len(impl.target))
|
|
|
|
stat.Blocks = allocatedBlocksForSize(stat.Size)
|
2019-10-19 18:48:09 +00:00
|
|
|
case *namedPipe:
|
|
|
|
stat.Mode |= linux.S_IFIFO
|
2019-07-18 22:09:14 +00:00
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("unknown inode type: %T", i.impl))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// allocatedBlocksForSize returns the number of 512B blocks needed to
|
|
|
|
// accommodate the given size in bytes, as appropriate for struct
|
|
|
|
// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
|
|
|
|
// size is independent of the "preferred block size for I/O", struct
|
|
|
|
// stat::st_blksize and struct statx::stx_blksize.)
|
|
|
|
func allocatedBlocksForSize(size uint64) uint64 {
|
|
|
|
return (size + 511) / 512
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (i *inode) direntType() uint8 {
|
2019-07-18 22:09:14 +00:00
|
|
|
switch i.impl.(type) {
|
|
|
|
case *regularFile:
|
|
|
|
return linux.DT_REG
|
|
|
|
case *directory:
|
|
|
|
return linux.DT_DIR
|
|
|
|
case *symlink:
|
|
|
|
return linux.DT_LNK
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("unknown inode type: %T", i.impl))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// fileDescription is embedded by memfs implementations of
|
|
|
|
// vfs.FileDescriptionImpl.
|
|
|
|
type fileDescription struct {
|
|
|
|
vfsfd vfs.FileDescription
|
2019-08-16 17:18:58 +00:00
|
|
|
vfs.FileDescriptionDefaultImpl
|
2019-07-18 22:09:14 +00:00
|
|
|
|
|
|
|
flags uint32 // status flags; immutable
|
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (fd *fileDescription) filesystem() *filesystem {
|
|
|
|
return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem)
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
|
2019-08-08 18:45:33 +00:00
|
|
|
func (fd *fileDescription) inode() *inode {
|
|
|
|
return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode
|
2019-07-18 22:09:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// StatusFlags implements vfs.FileDescriptionImpl.StatusFlags.
|
|
|
|
func (fd *fileDescription) StatusFlags(ctx context.Context) (uint32, error) {
|
|
|
|
return fd.flags, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetStatusFlags implements vfs.FileDescriptionImpl.SetStatusFlags.
|
|
|
|
func (fd *fileDescription) SetStatusFlags(ctx context.Context, flags uint32) error {
|
|
|
|
// None of the flags settable by fcntl(F_SETFL) are supported, so this is a
|
|
|
|
// no-op.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat implements vfs.FileDescriptionImpl.Stat.
|
|
|
|
func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
|
|
|
|
var stat linux.Statx
|
|
|
|
fd.inode().statTo(&stat)
|
|
|
|
return stat, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetStat implements vfs.FileDescriptionImpl.SetStat.
|
|
|
|
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
|
|
|
|
if opts.Stat.Mask == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2019-08-08 18:45:33 +00:00
|
|
|
// TODO: implement inode.setStat
|
2019-07-18 22:09:14 +00:00
|
|
|
return syserror.EPERM
|
|
|
|
}
|