1506 lines
44 KiB
Go
1506 lines
44 KiB
Go
// Copyright 2020 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package overlay
|
|
|
|
import (
|
|
"strings"
|
|
"sync/atomic"
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
"gvisor.dev/gvisor/pkg/context"
|
|
"gvisor.dev/gvisor/pkg/fspath"
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
|
"gvisor.dev/gvisor/pkg/sync"
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
)
|
|
|
|
// _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs
|
|
// attributes.
|
|
// Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_PREFIX
|
|
const _OVL_XATTR_PREFIX = linux.XATTR_TRUSTED_PREFIX + "overlay."
|
|
|
|
// _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for
|
|
// opaque directories.
|
|
// Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE
|
|
const _OVL_XATTR_OPAQUE = _OVL_XATTR_PREFIX + "opaque"
|
|
|
|
func isWhiteout(stat *linux.Statx) bool {
|
|
return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0
|
|
}
|
|
|
|
// Sync implements vfs.FilesystemImpl.Sync.
|
|
func (fs *filesystem) Sync(ctx context.Context) error {
|
|
if fs.opts.UpperRoot.Ok() {
|
|
return fs.opts.UpperRoot.Mount().Filesystem().Impl().Sync(ctx)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var dentrySlicePool = sync.Pool{
|
|
New: func() interface{} {
|
|
ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
|
|
return &ds
|
|
},
|
|
}
|
|
|
|
func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
|
|
if ds == nil {
|
|
ds = dentrySlicePool.Get().(*[]*dentry)
|
|
}
|
|
*ds = append(*ds, d)
|
|
return ds
|
|
}
|
|
|
|
// Preconditions: ds != nil.
|
|
func putDentrySlice(ds *[]*dentry) {
|
|
// Allow dentries to be GC'd.
|
|
for i := range *ds {
|
|
(*ds)[i] = nil
|
|
}
|
|
*ds = (*ds)[:0]
|
|
dentrySlicePool.Put(ds)
|
|
}
|
|
|
|
// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
|
|
// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
|
|
// writing.
|
|
//
|
|
// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
|
|
// but dentry slices are allocated lazily, and it's much easier to say "defer
|
|
// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
|
|
// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
|
|
func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
|
|
fs.renameMu.RUnlock()
|
|
if *ds == nil {
|
|
return
|
|
}
|
|
if len(**ds) != 0 {
|
|
fs.renameMu.Lock()
|
|
for _, d := range **ds {
|
|
d.checkDropLocked(ctx)
|
|
}
|
|
fs.renameMu.Unlock()
|
|
}
|
|
putDentrySlice(*ds)
|
|
}
|
|
|
|
func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
|
|
if *ds == nil {
|
|
fs.renameMu.Unlock()
|
|
return
|
|
}
|
|
for _, d := range **ds {
|
|
d.checkDropLocked(ctx)
|
|
}
|
|
fs.renameMu.Unlock()
|
|
putDentrySlice(*ds)
|
|
}
|
|
|
|
// stepLocked resolves rp.Component() to an existing file, starting from the
|
|
// given directory.
|
|
//
|
|
// Dentries which may have a reference count of zero, and which therefore
|
|
// should be dropped once traversal is complete, are appended to ds.
|
|
//
|
|
// Preconditions:
|
|
// * fs.renameMu must be locked.
|
|
// * d.dirMu must be locked.
|
|
// * !rp.Done().
|
|
func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
|
|
if !d.isDir() {
|
|
return nil, syserror.ENOTDIR
|
|
}
|
|
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
|
|
return nil, err
|
|
}
|
|
afterSymlink:
|
|
name := rp.Component()
|
|
if name == "." {
|
|
rp.Advance()
|
|
return d, nil
|
|
}
|
|
if name == ".." {
|
|
if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
|
|
return nil, err
|
|
} else if isRoot || d.parent == nil {
|
|
rp.Advance()
|
|
return d, nil
|
|
}
|
|
if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
|
|
return nil, err
|
|
}
|
|
rp.Advance()
|
|
return d.parent, nil
|
|
}
|
|
child, err := fs.getChildLocked(ctx, d, name, ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
|
|
return nil, err
|
|
}
|
|
if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
|
|
target, err := child.readlink(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := rp.HandleSymlink(target); err != nil {
|
|
return nil, err
|
|
}
|
|
goto afterSymlink // don't check the current directory again
|
|
}
|
|
rp.Advance()
|
|
return child, nil
|
|
}
|
|
|
|
// Preconditions:
|
|
// * fs.renameMu must be locked.
|
|
// * d.dirMu must be locked.
|
|
func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
|
|
if child, ok := parent.children[name]; ok {
|
|
return child, nil
|
|
}
|
|
child, err := fs.lookupLocked(ctx, parent, name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if parent.children == nil {
|
|
parent.children = make(map[string]*dentry)
|
|
}
|
|
parent.children[name] = child
|
|
// child's refcount is initially 0, so it may be dropped after traversal.
|
|
*ds = appendDentry(*ds, child)
|
|
return child, nil
|
|
}
|
|
|
|
// Preconditions:
|
|
// * fs.renameMu must be locked.
|
|
// * parent.dirMu must be locked.
|
|
func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, error) {
|
|
childPath := fspath.Parse(name)
|
|
child := fs.newDentry()
|
|
existsOnAnyLayer := false
|
|
var lookupErr error
|
|
|
|
vfsObj := fs.vfsfs.VirtualFilesystem()
|
|
parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool {
|
|
childVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: parentVD,
|
|
Start: parentVD,
|
|
Path: childPath,
|
|
}, &vfs.GetDentryOptions{})
|
|
if err == syserror.ENOENT || err == syserror.ENAMETOOLONG {
|
|
// The file doesn't exist on this layer. Proceed to the next one.
|
|
return true
|
|
}
|
|
if err != nil {
|
|
lookupErr = err
|
|
return false
|
|
}
|
|
defer childVD.DecRef(ctx)
|
|
|
|
mask := uint32(linux.STATX_TYPE)
|
|
if !existsOnAnyLayer {
|
|
// Mode, UID, GID, and (for non-directories) inode number come from
|
|
// the topmost layer on which the file exists.
|
|
mask |= linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
|
|
}
|
|
stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: childVD,
|
|
Start: childVD,
|
|
}, &vfs.StatOptions{
|
|
Mask: mask,
|
|
})
|
|
if err != nil {
|
|
lookupErr = err
|
|
return false
|
|
}
|
|
if stat.Mask&mask != mask {
|
|
lookupErr = syserror.EREMOTE
|
|
return false
|
|
}
|
|
|
|
if isWhiteout(&stat) {
|
|
// This is a whiteout, so it "doesn't exist" on this layer, and
|
|
// layers below this one are ignored.
|
|
return false
|
|
}
|
|
isDir := stat.Mode&linux.S_IFMT == linux.S_IFDIR
|
|
if existsOnAnyLayer && !isDir {
|
|
// Directories are not merged with non-directory files from lower
|
|
// layers; instead, layers including and below the first
|
|
// non-directory file are ignored. (This file must be a directory
|
|
// on previous layers, since lower layers aren't searched for
|
|
// non-directory files.)
|
|
return false
|
|
}
|
|
|
|
// Update child to include this layer.
|
|
childVD.IncRef()
|
|
if isUpper {
|
|
child.upperVD = childVD
|
|
child.copiedUp = 1
|
|
} else {
|
|
child.lowerVDs = append(child.lowerVDs, childVD)
|
|
}
|
|
if !existsOnAnyLayer {
|
|
existsOnAnyLayer = true
|
|
child.mode = uint32(stat.Mode)
|
|
child.uid = stat.UID
|
|
child.gid = stat.GID
|
|
child.devMajor = stat.DevMajor
|
|
child.devMinor = stat.DevMinor
|
|
child.ino = stat.Ino
|
|
}
|
|
|
|
// For non-directory files, only the topmost layer that contains a file
|
|
// matters.
|
|
if !isDir {
|
|
return false
|
|
}
|
|
|
|
// Directories are merged with directories from lower layers if they
|
|
// are not explicitly opaque.
|
|
opaqueVal, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: childVD,
|
|
Start: childVD,
|
|
}, &vfs.GetXattrOptions{
|
|
Name: _OVL_XATTR_OPAQUE,
|
|
Size: 1,
|
|
})
|
|
return !(err == nil && opaqueVal == "y")
|
|
})
|
|
|
|
if lookupErr != nil {
|
|
child.destroyLocked(ctx)
|
|
return nil, lookupErr
|
|
}
|
|
if !existsOnAnyLayer {
|
|
child.destroyLocked(ctx)
|
|
return nil, syserror.ENOENT
|
|
}
|
|
|
|
// Device and inode numbers were copied from the topmost layer above;
|
|
// override them if necessary.
|
|
if child.isDir() {
|
|
child.devMajor = linux.UNNAMED_MAJOR
|
|
child.devMinor = fs.dirDevMinor
|
|
child.ino = fs.newDirIno()
|
|
} else if !child.upperVD.Ok() {
|
|
child.devMajor = linux.UNNAMED_MAJOR
|
|
child.devMinor = fs.lowerDevMinors[child.lowerVDs[0].Mount().Filesystem()]
|
|
}
|
|
|
|
parent.IncRef()
|
|
child.parent = parent
|
|
child.name = name
|
|
return child, nil
|
|
}
|
|
|
|
// lookupLayerLocked is similar to lookupLocked, but only returns information
|
|
// about the file rather than a dentry.
|
|
//
|
|
// Preconditions:
|
|
// * fs.renameMu must be locked.
|
|
// * parent.dirMu must be locked.
|
|
func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, name string) (lookupLayer, error) {
|
|
childPath := fspath.Parse(name)
|
|
lookupLayer := lookupLayerNone
|
|
var lookupErr error
|
|
|
|
parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool {
|
|
stat, err := fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: parentVD,
|
|
Start: parentVD,
|
|
Path: childPath,
|
|
}, &vfs.StatOptions{
|
|
Mask: linux.STATX_TYPE,
|
|
})
|
|
if err == syserror.ENOENT || err == syserror.ENAMETOOLONG {
|
|
// The file doesn't exist on this layer. Proceed to the next
|
|
// one.
|
|
return true
|
|
}
|
|
if err != nil {
|
|
lookupErr = err
|
|
return false
|
|
}
|
|
if stat.Mask&linux.STATX_TYPE == 0 {
|
|
// Linux's overlayfs tends to return EREMOTE in cases where a file
|
|
// is unusable for reasons that are not better captured by another
|
|
// errno.
|
|
lookupErr = syserror.EREMOTE
|
|
return false
|
|
}
|
|
if isWhiteout(&stat) {
|
|
// This is a whiteout, so it "doesn't exist" on this layer, and
|
|
// layers below this one are ignored.
|
|
if isUpper {
|
|
lookupLayer = lookupLayerUpperWhiteout
|
|
}
|
|
return false
|
|
}
|
|
// The file exists; we can stop searching.
|
|
if isUpper {
|
|
lookupLayer = lookupLayerUpper
|
|
} else {
|
|
lookupLayer = lookupLayerLower
|
|
}
|
|
return false
|
|
})
|
|
|
|
return lookupLayer, lookupErr
|
|
}
|
|
|
|
type lookupLayer int
|
|
|
|
const (
|
|
// lookupLayerNone indicates that no file exists at the given path on the
|
|
// upper layer, and is either whited out or does not exist on lower layers.
|
|
// Therefore, the file does not exist in the overlay filesystem, and file
|
|
// creation may proceed normally (if an upper layer exists).
|
|
lookupLayerNone lookupLayer = iota
|
|
|
|
// lookupLayerLower indicates that no file exists at the given path on the
|
|
// upper layer, but exists on a lower layer. Therefore, the file exists in
|
|
// the overlay filesystem, but must be copied-up before mutation.
|
|
lookupLayerLower
|
|
|
|
// lookupLayerUpper indicates that a non-whiteout file exists at the given
|
|
// path on the upper layer. Therefore, the file exists in the overlay
|
|
// filesystem, and is already copied-up.
|
|
lookupLayerUpper
|
|
|
|
// lookupLayerUpperWhiteout indicates that a whiteout exists at the given
|
|
// path on the upper layer. Therefore, the file does not exist in the
|
|
// overlay filesystem, and file creation must remove the whiteout before
|
|
// proceeding.
|
|
lookupLayerUpperWhiteout
|
|
)
|
|
|
|
func (ll lookupLayer) existsInOverlay() bool {
|
|
return ll == lookupLayerLower || ll == lookupLayerUpper
|
|
}
|
|
|
|
// walkParentDirLocked resolves all but the last path component of rp to an
|
|
// existing directory, starting from the given directory (which is usually
|
|
// rp.Start().Impl().(*dentry)). It does not check that the returned directory
|
|
// is searchable by the provider of rp.
|
|
//
|
|
// Preconditions:
|
|
// * fs.renameMu must be locked.
|
|
// * !rp.Done().
|
|
func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
|
|
for !rp.Final() {
|
|
d.dirMu.Lock()
|
|
next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
|
|
d.dirMu.Unlock()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d = next
|
|
}
|
|
if !d.isDir() {
|
|
return nil, syserror.ENOTDIR
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// resolveLocked resolves rp to an existing file.
|
|
//
|
|
// Preconditions: fs.renameMu must be locked.
|
|
func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
|
|
d := rp.Start().Impl().(*dentry)
|
|
for !rp.Done() {
|
|
d.dirMu.Lock()
|
|
next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
|
|
d.dirMu.Unlock()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d = next
|
|
}
|
|
if rp.MustBeDir() && !d.isDir() {
|
|
return nil, syserror.ENOTDIR
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// doCreateAt checks that creating a file at rp is permitted, then invokes
|
|
// create to do so.
|
|
//
|
|
// Preconditions:
|
|
// * !rp.Done().
|
|
// * For the final path component in rp, !rp.ShouldFollowSymlink().
|
|
func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
start := rp.Start().Impl().(*dentry)
|
|
parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
|
|
return err
|
|
}
|
|
name := rp.Component()
|
|
if name == "." || name == ".." {
|
|
return syserror.EEXIST
|
|
}
|
|
if !dir && rp.MustBeDir() {
|
|
return syserror.ENOENT
|
|
}
|
|
if parent.vfsd.IsDead() {
|
|
return syserror.ENOENT
|
|
}
|
|
mnt := rp.Mount()
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer mnt.EndWrite()
|
|
parent.dirMu.Lock()
|
|
defer parent.dirMu.Unlock()
|
|
|
|
// Determine if a file already exists at name.
|
|
if _, ok := parent.children[name]; ok {
|
|
return syserror.EEXIST
|
|
}
|
|
childLayer, err := fs.lookupLayerLocked(ctx, parent, name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if childLayer.existsInOverlay() {
|
|
return syserror.EEXIST
|
|
}
|
|
|
|
// Ensure that the parent directory is copied-up so that we can create the
|
|
// new file in the upper layer.
|
|
if err := parent.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Finally create the new file.
|
|
if err := create(parent, name, childLayer == lookupLayerUpperWhiteout); err != nil {
|
|
return err
|
|
}
|
|
parent.dirents = nil
|
|
return nil
|
|
}
|
|
|
|
// Preconditions: pop's parent directory has been copied up.
|
|
func (fs *filesystem) createWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, pop *vfs.PathOperation) error {
|
|
return vfsObj.MknodAt(ctx, fs.creds, pop, &vfs.MknodOptions{
|
|
Mode: linux.S_IFCHR, // permissions == include/linux/fs.h:WHITEOUT_MODE == 0
|
|
// DevMajor == DevMinor == 0, from include/linux/fs.h:WHITEOUT_DEV
|
|
})
|
|
}
|
|
|
|
func (fs *filesystem) cleanupRecreateWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, pop *vfs.PathOperation) {
|
|
if err := fs.createWhiteout(ctx, vfsObj, pop); err != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to recreate whiteout after failed file creation: %v", err)
|
|
}
|
|
}
|
|
|
|
// AccessAt implements vfs.Filesystem.Impl.AccessAt.
|
|
func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return d.checkPermissions(creds, ats)
|
|
}
|
|
|
|
// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
|
|
func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
|
|
return nil, err
|
|
}
|
|
layerVD := d.topLayer()
|
|
return fs.vfsfs.VirtualFilesystem().BoundEndpointAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: layerVD,
|
|
Start: layerVD,
|
|
}, &opts)
|
|
}
|
|
|
|
// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
|
|
func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if opts.CheckSearchable {
|
|
if !d.isDir() {
|
|
return nil, syserror.ENOTDIR
|
|
}
|
|
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
d.IncRef()
|
|
return &d.vfsd, nil
|
|
}
|
|
|
|
// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
|
|
func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
start := rp.Start().Impl().(*dentry)
|
|
d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.IncRef()
|
|
return &d.vfsd, nil
|
|
}
|
|
|
|
// LinkAt implements vfs.FilesystemImpl.LinkAt.
|
|
func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
|
|
return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
|
|
if rp.Mount() != vd.Mount() {
|
|
return syserror.EXDEV
|
|
}
|
|
old := vd.Dentry().Impl().(*dentry)
|
|
if old.isDir() {
|
|
return syserror.EPERM
|
|
}
|
|
if err := old.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
vfsObj := fs.vfsfs.VirtualFilesystem()
|
|
newpop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(childName),
|
|
}
|
|
if haveUpperWhiteout {
|
|
if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := vfsObj.LinkAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: old.upperVD,
|
|
Start: old.upperVD,
|
|
}, &newpop); err != nil {
|
|
if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop)
|
|
}
|
|
return err
|
|
}
|
|
creds := rp.Credentials()
|
|
if err := vfsObj.SetStatAt(ctx, fs.creds, &newpop, &vfs.SetStatOptions{
|
|
Stat: linux.Statx{
|
|
Mask: linux.STATX_UID | linux.STATX_GID,
|
|
UID: uint32(creds.EffectiveKUID),
|
|
GID: uint32(creds.EffectiveKGID),
|
|
},
|
|
}); err != nil {
|
|
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer file after LinkAt metadata update failure: %v", cleanupErr)
|
|
} else if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop)
|
|
}
|
|
return err
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
|
|
func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
|
|
return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
|
|
vfsObj := fs.vfsfs.VirtualFilesystem()
|
|
pop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(childName),
|
|
}
|
|
if haveUpperWhiteout {
|
|
if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := vfsObj.MkdirAt(ctx, fs.creds, &pop, &opts); err != nil {
|
|
if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
creds := rp.Credentials()
|
|
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
|
|
Stat: linux.Statx{
|
|
Mask: linux.STATX_UID | linux.STATX_GID,
|
|
UID: uint32(creds.EffectiveKUID),
|
|
GID: uint32(creds.EffectiveKGID),
|
|
},
|
|
}); err != nil {
|
|
if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr)
|
|
} else if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
if haveUpperWhiteout {
|
|
// There may be directories on lower layers (previously hidden by
|
|
// the whiteout) that the new directory should not be merged with.
|
|
// Mark it opaque to prevent merging.
|
|
if err := vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{
|
|
Name: _OVL_XATTR_OPAQUE,
|
|
Value: "y",
|
|
}); err != nil {
|
|
if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt set-opaque failure: %v", cleanupErr)
|
|
} else {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// MknodAt implements vfs.FilesystemImpl.MknodAt.
|
|
func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
|
|
return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
|
|
// Disallow attempts to create whiteouts.
|
|
if opts.Mode&linux.S_IFMT == linux.S_IFCHR && opts.DevMajor == 0 && opts.DevMinor == 0 {
|
|
return syserror.EPERM
|
|
}
|
|
vfsObj := fs.vfsfs.VirtualFilesystem()
|
|
pop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(childName),
|
|
}
|
|
if haveUpperWhiteout {
|
|
if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := vfsObj.MknodAt(ctx, fs.creds, &pop, &opts); err != nil {
|
|
if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
creds := rp.Credentials()
|
|
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
|
|
Stat: linux.Statx{
|
|
Mask: linux.STATX_UID | linux.STATX_GID,
|
|
UID: uint32(creds.EffectiveKUID),
|
|
GID: uint32(creds.EffectiveKGID),
|
|
},
|
|
}); err != nil {
|
|
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr)
|
|
} else if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// OpenAt implements vfs.FilesystemImpl.OpenAt.
|
|
func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
|
|
mayCreate := opts.Flags&linux.O_CREAT != 0
|
|
mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
|
|
mayWrite := vfs.AccessTypesForOpenFlags(&opts).MayWrite()
|
|
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
unlocked := false
|
|
unlock := func() {
|
|
if !unlocked {
|
|
fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
unlocked = true
|
|
}
|
|
}
|
|
defer unlock()
|
|
|
|
start := rp.Start().Impl().(*dentry)
|
|
if rp.Done() {
|
|
if mayCreate && rp.MustBeDir() {
|
|
return nil, syserror.EISDIR
|
|
}
|
|
if mustCreate {
|
|
return nil, syserror.EEXIST
|
|
}
|
|
if mayWrite {
|
|
if err := start.copyUpLocked(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
start.IncRef()
|
|
defer start.DecRef(ctx)
|
|
unlock()
|
|
return start.openCopiedUp(ctx, rp, &opts)
|
|
}
|
|
|
|
afterTrailingSymlink:
|
|
parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Check for search permission in the parent directory.
|
|
if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
|
|
return nil, err
|
|
}
|
|
// Reject attempts to open directories with O_CREAT.
|
|
if mayCreate && rp.MustBeDir() {
|
|
return nil, syserror.EISDIR
|
|
}
|
|
// Determine whether or not we need to create a file.
|
|
parent.dirMu.Lock()
|
|
child, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
|
|
if err == syserror.ENOENT && mayCreate {
|
|
fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds)
|
|
parent.dirMu.Unlock()
|
|
return fd, err
|
|
}
|
|
parent.dirMu.Unlock()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Open existing child or follow symlink.
|
|
if mustCreate {
|
|
return nil, syserror.EEXIST
|
|
}
|
|
if child.isSymlink() && rp.ShouldFollowSymlink() {
|
|
target, err := child.readlink(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := rp.HandleSymlink(target); err != nil {
|
|
return nil, err
|
|
}
|
|
start = parent
|
|
goto afterTrailingSymlink
|
|
}
|
|
if rp.MustBeDir() && !child.isDir() {
|
|
return nil, syserror.ENOTDIR
|
|
}
|
|
if mayWrite {
|
|
if err := child.copyUpLocked(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
child.IncRef()
|
|
defer child.DecRef(ctx)
|
|
unlock()
|
|
return child.openCopiedUp(ctx, rp, &opts)
|
|
}
|
|
|
|
// Preconditions: If vfs.AccessTypesForOpenFlags(opts).MayWrite(), then d has
|
|
// been copied up.
|
|
func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
|
|
ats := vfs.AccessTypesForOpenFlags(opts)
|
|
if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
|
|
return nil, err
|
|
}
|
|
mnt := rp.Mount()
|
|
|
|
// Directory FDs open FDs from each layer when directory entries are read,
|
|
// so they don't require opening an FD from d.topLayer() up front.
|
|
ftype := atomic.LoadUint32(&d.mode) & linux.S_IFMT
|
|
if ftype == linux.S_IFDIR {
|
|
// Can't open directories with O_CREAT.
|
|
if opts.Flags&linux.O_CREAT != 0 {
|
|
return nil, syserror.EISDIR
|
|
}
|
|
// Can't open directories writably.
|
|
if ats.MayWrite() {
|
|
return nil, syserror.EISDIR
|
|
}
|
|
if opts.Flags&linux.O_DIRECT != 0 {
|
|
return nil, syserror.EINVAL
|
|
}
|
|
fd := &directoryFD{}
|
|
fd.LockFD.Init(&d.locks)
|
|
if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
|
|
UseDentryMetadata: true,
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
return &fd.vfsfd, nil
|
|
}
|
|
|
|
layerVD, isUpper := d.topLayerInfo()
|
|
layerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
|
|
Root: layerVD,
|
|
Start: layerVD,
|
|
}, opts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layerFlags := layerFD.StatusFlags()
|
|
fd := &nonDirectoryFD{
|
|
copiedUp: isUpper,
|
|
cachedFD: layerFD,
|
|
cachedFlags: layerFlags,
|
|
}
|
|
fd.LockFD.Init(&d.locks)
|
|
layerFDOpts := layerFD.Options()
|
|
if err := fd.vfsfd.Init(fd, layerFlags, mnt, &d.vfsd, &layerFDOpts); err != nil {
|
|
layerFD.DecRef(ctx)
|
|
return nil, err
|
|
}
|
|
return &fd.vfsfd, nil
|
|
}
|
|
|
|
// Preconditions:
|
|
// * parent.dirMu must be locked.
|
|
// * parent does not already contain a child named rp.Component().
|
|
func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
|
|
creds := rp.Credentials()
|
|
if err := parent.checkPermissions(creds, vfs.MayWrite); err != nil {
|
|
return nil, err
|
|
}
|
|
if parent.vfsd.IsDead() {
|
|
return nil, syserror.ENOENT
|
|
}
|
|
mnt := rp.Mount()
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
return nil, err
|
|
}
|
|
defer mnt.EndWrite()
|
|
|
|
if err := parent.copyUpLocked(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
vfsObj := fs.vfsfs.VirtualFilesystem()
|
|
childName := rp.Component()
|
|
pop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(childName),
|
|
}
|
|
// We don't know if a whiteout exists on the upper layer; speculatively
|
|
// unlink it.
|
|
//
|
|
// TODO(gvisor.dev/issue/1199): Modify OpenAt => stepLocked so that we do
|
|
// know whether a whiteout exists.
|
|
var haveUpperWhiteout bool
|
|
switch err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err {
|
|
case nil:
|
|
haveUpperWhiteout = true
|
|
case syserror.ENOENT:
|
|
haveUpperWhiteout = false
|
|
default:
|
|
return nil, err
|
|
}
|
|
// Create the file on the upper layer, and get an FD representing it.
|
|
upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{
|
|
Flags: opts.Flags&^vfs.FileCreationFlags | linux.O_CREAT | linux.O_EXCL,
|
|
Mode: opts.Mode,
|
|
})
|
|
if err != nil {
|
|
if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return nil, err
|
|
}
|
|
// Change the file's owner to the caller. We can't use upperFD.SetStat()
|
|
// because it will pick up creds from ctx.
|
|
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
|
|
Stat: linux.Statx{
|
|
Mask: linux.STATX_UID | linux.STATX_GID,
|
|
UID: uint32(creds.EffectiveKUID),
|
|
GID: uint32(creds.EffectiveKGID),
|
|
},
|
|
}); err != nil {
|
|
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr)
|
|
} else if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return nil, err
|
|
}
|
|
// Re-lookup to get a dentry representing the new file, which is needed for
|
|
// the returned FD.
|
|
child, err := fs.getChildLocked(ctx, parent, childName, ds)
|
|
if err != nil {
|
|
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) dentry lookup failure: %v", cleanupErr)
|
|
} else if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return nil, err
|
|
}
|
|
// Finally construct the overlay FD.
|
|
upperFlags := upperFD.StatusFlags()
|
|
fd := &nonDirectoryFD{
|
|
copiedUp: true,
|
|
cachedFD: upperFD,
|
|
cachedFlags: upperFlags,
|
|
}
|
|
fd.LockFD.Init(&child.locks)
|
|
upperFDOpts := upperFD.Options()
|
|
if err := fd.vfsfd.Init(fd, upperFlags, mnt, &child.vfsd, &upperFDOpts); err != nil {
|
|
upperFD.DecRef(ctx)
|
|
// Don't bother with cleanup; the file was created successfully, we
|
|
// just can't open it anymore for some reason.
|
|
return nil, err
|
|
}
|
|
return &fd.vfsfd, nil
|
|
}
|
|
|
|
// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
|
|
func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
layerVD := d.topLayer()
|
|
return fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
|
|
Root: layerVD,
|
|
Start: layerVD,
|
|
})
|
|
}
|
|
|
|
// RenameAt implements vfs.FilesystemImpl.RenameAt.
|
|
func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
|
|
if opts.Flags != 0 {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
var ds *[]*dentry
|
|
fs.renameMu.Lock()
|
|
defer fs.renameMuUnlockAndCheckDrop(ctx, &ds)
|
|
newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
newName := rp.Component()
|
|
if newName == "." || newName == ".." {
|
|
return syserror.EBUSY
|
|
}
|
|
mnt := rp.Mount()
|
|
if mnt != oldParentVD.Mount() {
|
|
return syserror.EXDEV
|
|
}
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer mnt.EndWrite()
|
|
|
|
// FIXME(gvisor.dev/issue/1199): Actually implement rename.
|
|
_ = newParent
|
|
return syserror.EXDEV
|
|
}
|
|
|
|
// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
|
|
func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
start := rp.Start().Impl().(*dentry)
|
|
parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
|
|
return err
|
|
}
|
|
if err := rp.Mount().CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer rp.Mount().EndWrite()
|
|
name := rp.Component()
|
|
if name == "." {
|
|
return syserror.EINVAL
|
|
}
|
|
if name == ".." {
|
|
return syserror.ENOTEMPTY
|
|
}
|
|
vfsObj := rp.VirtualFilesystem()
|
|
mntns := vfs.MountNamespaceFromContext(ctx)
|
|
defer mntns.DecRef(ctx)
|
|
parent.dirMu.Lock()
|
|
defer parent.dirMu.Unlock()
|
|
|
|
// Ensure that parent is copied-up before potentially holding child.copyMu
|
|
// below.
|
|
if err := parent.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Unlike UnlinkAt, we need a dentry representing the child directory being
|
|
// removed in order to verify that it's empty.
|
|
child, err := fs.getChildLocked(ctx, parent, name, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !child.isDir() {
|
|
return syserror.ENOTDIR
|
|
}
|
|
child.dirMu.Lock()
|
|
defer child.dirMu.Unlock()
|
|
whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
child.copyMu.RLock()
|
|
defer child.copyMu.RUnlock()
|
|
if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
|
|
return err
|
|
}
|
|
|
|
pop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(name),
|
|
}
|
|
if child.upperVD.Ok() {
|
|
cleanupRecreateWhiteouts := func() {
|
|
if !child.upperVD.Ok() {
|
|
return
|
|
}
|
|
for whiteoutName, whiteoutUpper := range whiteouts {
|
|
if !whiteoutUpper {
|
|
continue
|
|
}
|
|
if err := fs.createWhiteout(ctx, vfsObj, &vfs.PathOperation{
|
|
Root: child.upperVD,
|
|
Start: child.upperVD,
|
|
Path: fspath.Parse(whiteoutName),
|
|
}); err != nil && err != syserror.EEXIST {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RmdirAt failure: %v", err)
|
|
}
|
|
}
|
|
}
|
|
// Remove existing whiteouts on the upper layer.
|
|
for whiteoutName, whiteoutUpper := range whiteouts {
|
|
if !whiteoutUpper {
|
|
continue
|
|
}
|
|
if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: child.upperVD,
|
|
Start: child.upperVD,
|
|
Path: fspath.Parse(whiteoutName),
|
|
}); err != nil {
|
|
cleanupRecreateWhiteouts()
|
|
vfsObj.AbortDeleteDentry(&child.vfsd)
|
|
return err
|
|
}
|
|
}
|
|
// Remove the existing directory on the upper layer.
|
|
if err := vfsObj.RmdirAt(ctx, fs.creds, &pop); err != nil {
|
|
cleanupRecreateWhiteouts()
|
|
vfsObj.AbortDeleteDentry(&child.vfsd)
|
|
return err
|
|
}
|
|
}
|
|
if err := fs.createWhiteout(ctx, vfsObj, &pop); err != nil {
|
|
// Don't attempt to recover from this: the original directory is
|
|
// already gone, so any dentries representing it are invalid, and
|
|
// creating a new directory won't undo that.
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to create whiteout during RmdirAt: %v", err)
|
|
vfsObj.AbortDeleteDentry(&child.vfsd)
|
|
return err
|
|
}
|
|
|
|
vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
|
|
delete(parent.children, name)
|
|
ds = appendDentry(ds, child)
|
|
parent.dirents = nil
|
|
return nil
|
|
}
|
|
|
|
// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
|
|
func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
|
|
if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
|
|
return err
|
|
}
|
|
mnt := rp.Mount()
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer mnt.EndWrite()
|
|
if err := d.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
// Changes to d's attributes are serialized by d.copyMu.
|
|
d.copyMu.Lock()
|
|
defer d.copyMu.Unlock()
|
|
if err := d.fs.vfsfs.VirtualFilesystem().SetStatAt(ctx, d.fs.creds, &vfs.PathOperation{
|
|
Root: d.upperVD,
|
|
Start: d.upperVD,
|
|
}, &opts); err != nil {
|
|
return err
|
|
}
|
|
d.updateAfterSetStatLocked(&opts)
|
|
return nil
|
|
}
|
|
|
|
// StatAt implements vfs.FilesystemImpl.StatAt.
|
|
func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return linux.Statx{}, err
|
|
}
|
|
|
|
var stat linux.Statx
|
|
if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 {
|
|
layerVD := d.topLayer()
|
|
stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{
|
|
Root: layerVD,
|
|
Start: layerVD,
|
|
}, &vfs.StatOptions{
|
|
Mask: layerMask,
|
|
Sync: opts.Sync,
|
|
})
|
|
if err != nil {
|
|
return linux.Statx{}, err
|
|
}
|
|
}
|
|
d.statInternalTo(ctx, &opts, &stat)
|
|
return stat, nil
|
|
}
|
|
|
|
// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
|
|
func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
_, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return linux.Statfs{}, err
|
|
}
|
|
return fs.statFS(ctx)
|
|
}
|
|
|
|
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
|
|
func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
|
|
return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
|
|
vfsObj := fs.vfsfs.VirtualFilesystem()
|
|
pop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(childName),
|
|
}
|
|
if haveUpperWhiteout {
|
|
if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := vfsObj.SymlinkAt(ctx, fs.creds, &pop, target); err != nil {
|
|
if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
creds := rp.Credentials()
|
|
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
|
|
Stat: linux.Statx{
|
|
Mask: linux.STATX_UID | linux.STATX_GID,
|
|
UID: uint32(creds.EffectiveKUID),
|
|
GID: uint32(creds.EffectiveKGID),
|
|
},
|
|
}); err != nil {
|
|
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to delete upper layer file after SymlinkAt metadata update failure: %v", cleanupErr)
|
|
} else if haveUpperWhiteout {
|
|
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
|
|
}
|
|
return err
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
|
|
func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
start := rp.Start().Impl().(*dentry)
|
|
parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
|
|
return err
|
|
}
|
|
if err := rp.Mount().CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer rp.Mount().EndWrite()
|
|
name := rp.Component()
|
|
if name == "." || name == ".." {
|
|
return syserror.EISDIR
|
|
}
|
|
if rp.MustBeDir() {
|
|
return syserror.ENOTDIR
|
|
}
|
|
vfsObj := rp.VirtualFilesystem()
|
|
mntns := vfs.MountNamespaceFromContext(ctx)
|
|
defer mntns.DecRef(ctx)
|
|
parent.dirMu.Lock()
|
|
defer parent.dirMu.Unlock()
|
|
|
|
// Ensure that parent is copied-up before potentially holding child.copyMu
|
|
// below.
|
|
if err := parent.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
child := parent.children[name]
|
|
var childLayer lookupLayer
|
|
if child != nil {
|
|
if child.isDir() {
|
|
return syserror.EISDIR
|
|
}
|
|
if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
|
|
return err
|
|
}
|
|
// Hold child.copyMu to prevent it from being copied-up during
|
|
// deletion.
|
|
child.copyMu.RLock()
|
|
defer child.copyMu.RUnlock()
|
|
if child.upperVD.Ok() {
|
|
childLayer = lookupLayerUpper
|
|
} else {
|
|
childLayer = lookupLayerLower
|
|
}
|
|
} else {
|
|
// Determine if the file being unlinked actually exists. Holding
|
|
// parent.dirMu prevents a dentry from being instantiated for the file,
|
|
// which in turn prevents it from being copied-up, so this result is
|
|
// stable.
|
|
childLayer, err = fs.lookupLayerLocked(ctx, parent, name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !childLayer.existsInOverlay() {
|
|
return syserror.ENOENT
|
|
}
|
|
}
|
|
|
|
pop := vfs.PathOperation{
|
|
Root: parent.upperVD,
|
|
Start: parent.upperVD,
|
|
Path: fspath.Parse(name),
|
|
}
|
|
if childLayer == lookupLayerUpper {
|
|
// Remove the existing file on the upper layer.
|
|
if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
|
|
if child != nil {
|
|
vfsObj.AbortDeleteDentry(&child.vfsd)
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
if err := fs.createWhiteout(ctx, vfsObj, &pop); err != nil {
|
|
ctx.Warningf("Unrecoverable overlayfs inconsistency: failed to create whiteout during UnlinkAt: %v", err)
|
|
if child != nil {
|
|
vfsObj.AbortDeleteDentry(&child.vfsd)
|
|
}
|
|
return err
|
|
}
|
|
|
|
if child != nil {
|
|
vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
|
|
delete(parent.children, name)
|
|
ds = appendDentry(ds, child)
|
|
}
|
|
parent.dirents = nil
|
|
return nil
|
|
}
|
|
|
|
// isOverlayXattr returns whether the given extended attribute configures the
|
|
// overlay.
|
|
func isOverlayXattr(name string) bool {
|
|
return strings.HasPrefix(name, _OVL_XATTR_PREFIX)
|
|
}
|
|
|
|
// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
|
|
func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return fs.listXattr(ctx, d, size)
|
|
}
|
|
|
|
func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]string, error) {
|
|
vfsObj := d.fs.vfsfs.VirtualFilesystem()
|
|
top := d.topLayer()
|
|
names, err := vfsObj.ListXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Filter out all overlay attributes.
|
|
n := 0
|
|
for _, name := range names {
|
|
if !isOverlayXattr(name) {
|
|
names[n] = name
|
|
n++
|
|
}
|
|
}
|
|
return names[:n], err
|
|
}
|
|
|
|
// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
|
|
func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return fs.getXattr(ctx, d, rp.Credentials(), &opts)
|
|
}
|
|
|
|
func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
|
|
if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Return EOPNOTSUPP when fetching an overlay attribute.
|
|
// See fs/overlayfs/super.c:ovl_own_xattr_get().
|
|
if isOverlayXattr(opts.Name) {
|
|
return "", syserror.EOPNOTSUPP
|
|
}
|
|
|
|
// Analogous to fs/overlayfs/super.c:ovl_other_xattr_get().
|
|
vfsObj := d.fs.vfsfs.VirtualFilesystem()
|
|
top := d.topLayer()
|
|
return vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts)
|
|
}
|
|
|
|
// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
|
|
func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts)
|
|
}
|
|
|
|
// Precondition: fs.renameMu must be locked.
|
|
func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
|
|
if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Return EOPNOTSUPP when setting an overlay attribute.
|
|
// See fs/overlayfs/super.c:ovl_own_xattr_set().
|
|
if isOverlayXattr(opts.Name) {
|
|
return syserror.EOPNOTSUPP
|
|
}
|
|
|
|
// Analogous to fs/overlayfs/super.c:ovl_other_xattr_set().
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer mnt.EndWrite()
|
|
if err := d.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
vfsObj := d.fs.vfsfs.VirtualFilesystem()
|
|
return vfsObj.SetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts)
|
|
}
|
|
|
|
// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
|
|
func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
|
|
var ds *[]*dentry
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
|
|
d, err := fs.resolveLocked(ctx, rp, &ds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name)
|
|
}
|
|
|
|
// Precondition: fs.renameMu must be locked.
|
|
func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, name string) error {
|
|
if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Like SetXattrAt, return EOPNOTSUPP when removing an overlay attribute.
|
|
// Linux passes the remove request to xattr_handler->set.
|
|
// See fs/xattr.c:vfs_removexattr().
|
|
if isOverlayXattr(name) {
|
|
return syserror.EOPNOTSUPP
|
|
}
|
|
|
|
if err := mnt.CheckBeginWrite(); err != nil {
|
|
return err
|
|
}
|
|
defer mnt.EndWrite()
|
|
if err := d.copyUpLocked(ctx); err != nil {
|
|
return err
|
|
}
|
|
vfsObj := d.fs.vfsfs.VirtualFilesystem()
|
|
return vfsObj.RemoveXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name)
|
|
}
|
|
|
|
// PrependPath implements vfs.FilesystemImpl.PrependPath.
|
|
func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
|
|
fs.renameMu.RLock()
|
|
defer fs.renameMu.RUnlock()
|
|
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
|
|
}
|