2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package fs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2018-07-26 22:54:55 +00:00
|
|
|
"strings"
|
2018-04-27 17:37:02 +00:00
|
|
|
"sync"
|
|
|
|
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/log"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/context"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/memmap"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/usermem"
|
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
2019-06-27 21:22:40 +00:00
|
|
|
"gvisor.dev/gvisor/third_party/gvsync"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// The virtual filesystem implements an overlay configuration. For a high-level
|
|
|
|
// description, see README.md.
|
|
|
|
//
|
|
|
|
// Note on whiteouts:
|
|
|
|
//
|
|
|
|
// This implementation does not use the "Docker-style" whiteouts (symlinks with
|
|
|
|
// ".wh." prefix). Instead upper filesystem directories support a set of extended
|
|
|
|
// attributes to encode whiteouts: "trusted.overlay.whiteout.<filename>". This
|
|
|
|
// gives flexibility to persist whiteouts independently of the filesystem layout
|
|
|
|
// while additionally preventing name conflicts with files prefixed with ".wh.".
|
|
|
|
//
|
|
|
|
// Known deficiencies:
|
|
|
|
//
|
|
|
|
// - The device number of two files under the same overlay mount point may be
|
|
|
|
// different. This can happen if a file is found in the lower filesystem (takes
|
|
|
|
// the lower filesystem device) and another file is created in the upper
|
|
|
|
// filesystem (takes the upper filesystem device). This may appear odd but
|
|
|
|
// should not break applications.
|
|
|
|
//
|
|
|
|
// - Registered events on files (i.e. for notification of read/write readiness)
|
|
|
|
// are not copied across copy up. This is fine in the common case of files that
|
|
|
|
// do not block. For files that do block, like pipes and sockets, copy up is not
|
|
|
|
// supported.
|
|
|
|
//
|
|
|
|
// - Hardlinks in a lower filesystem are broken by copy up. For this reason, no
|
|
|
|
// attempt is made to preserve link count across copy up.
|
|
|
|
//
|
|
|
|
// - The maximum length of an extended attribute name is the same as the maximum
|
|
|
|
// length of a file path in Linux (XATTR_NAME_MAX == NAME_MAX). This means that
|
|
|
|
// whiteout attributes, if set directly on the host, are limited additionally by
|
|
|
|
// the extra whiteout prefix length (file paths must be strictly shorter than
|
|
|
|
// NAME_MAX). This is not a problem for in-memory filesystems which don't enforce
|
|
|
|
// XATTR_NAME_MAX.
|
|
|
|
|
|
|
|
const (
|
|
|
|
// XattrOverlayPrefix is the prefix for extended attributes that affect
|
|
|
|
// the behavior of an overlay.
|
|
|
|
XattrOverlayPrefix = "trusted.overlay."
|
|
|
|
|
|
|
|
// XattrOverlayWhiteoutPrefix is the prefix for extended attributes
|
|
|
|
// that indicate that a whiteout exists.
|
|
|
|
XattrOverlayWhiteoutPrefix = XattrOverlayPrefix + "whiteout."
|
|
|
|
)
|
|
|
|
|
|
|
|
// XattrOverlayWhiteout returns an extended attribute that indicates a
|
|
|
|
// whiteout exists for name. It is supported by directories that wish to
|
|
|
|
// mask the existence of name.
|
|
|
|
func XattrOverlayWhiteout(name string) string {
|
|
|
|
return XattrOverlayWhiteoutPrefix + name
|
|
|
|
}
|
|
|
|
|
2018-07-26 22:54:55 +00:00
|
|
|
// isXattrOverlay returns whether the given extended attribute configures the
|
|
|
|
// overlay.
|
|
|
|
func isXattrOverlay(name string) bool {
|
|
|
|
return strings.HasPrefix(name, XattrOverlayPrefix)
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// NewOverlayRoot produces the root of an overlay.
|
|
|
|
//
|
|
|
|
// Preconditions:
|
|
|
|
//
|
|
|
|
// - upper and lower must be non-nil.
|
2018-08-11 00:15:27 +00:00
|
|
|
// - upper must not be an overlay.
|
2018-04-27 17:37:02 +00:00
|
|
|
// - lower should not expose character devices, pipes, or sockets, because
|
|
|
|
// copying up these types of files is not supported.
|
2018-08-11 00:15:27 +00:00
|
|
|
// - lower must not require that file objects be revalidated.
|
|
|
|
// - lower must not have dynamic file/directory content.
|
2018-04-27 17:37:02 +00:00
|
|
|
func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags MountSourceFlags) (*Inode, error) {
|
|
|
|
if !IsDir(upper.StableAttr) {
|
2018-12-05 20:45:35 +00:00
|
|
|
return nil, fmt.Errorf("upper Inode is a %v, not a directory", upper.StableAttr.Type)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
if !IsDir(lower.StableAttr) {
|
2018-12-05 20:45:35 +00:00
|
|
|
return nil, fmt.Errorf("lower Inode is a %v, not a directory", lower.StableAttr.Type)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
2018-08-11 00:15:27 +00:00
|
|
|
if upper.overlay != nil {
|
|
|
|
return nil, fmt.Errorf("cannot nest overlay in upper file of another overlay")
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2019-06-14 01:39:43 +00:00
|
|
|
msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags)
|
2018-04-27 17:37:02 +00:00
|
|
|
overlay, err := newOverlayEntry(ctx, upper, lower, true)
|
|
|
|
if err != nil {
|
|
|
|
msrc.DecRef()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return newOverlayInode(ctx, overlay, msrc), nil
|
|
|
|
}
|
|
|
|
|
2018-06-13 17:19:03 +00:00
|
|
|
// NewOverlayRootFile produces the root of an overlay that points to a file.
|
|
|
|
//
|
|
|
|
// Preconditions:
|
|
|
|
//
|
|
|
|
// - lower must be non-nil.
|
|
|
|
// - lower should not expose character devices, pipes, or sockets, because
|
|
|
|
// copying up these types of files is not supported. Neither it can be a dir.
|
|
|
|
// - lower must not require that file objects be revalidated.
|
|
|
|
// - lower must not have dynamic file/directory content.
|
|
|
|
func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, flags MountSourceFlags) (*Inode, error) {
|
2018-06-27 02:04:51 +00:00
|
|
|
if !IsRegular(lower.StableAttr) {
|
2018-06-13 17:19:03 +00:00
|
|
|
return nil, fmt.Errorf("lower Inode is not a regular file")
|
|
|
|
}
|
2019-06-14 01:39:43 +00:00
|
|
|
msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags)
|
2018-06-13 17:19:03 +00:00
|
|
|
overlay, err := newOverlayEntry(ctx, nil, lower, true)
|
|
|
|
if err != nil {
|
|
|
|
msrc.DecRef()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return newOverlayInode(ctx, overlay, msrc), nil
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// newOverlayInode creates a new Inode for an overlay.
|
|
|
|
func newOverlayInode(ctx context.Context, o *overlayEntry, msrc *MountSource) *Inode {
|
|
|
|
var inode *Inode
|
|
|
|
if o.upper != nil {
|
2019-06-14 01:39:43 +00:00
|
|
|
inode = NewInode(ctx, nil, msrc, o.upper.StableAttr)
|
2018-04-27 17:37:02 +00:00
|
|
|
} else {
|
2019-06-14 01:39:43 +00:00
|
|
|
inode = NewInode(ctx, nil, msrc, o.lower.StableAttr)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
inode.overlay = o
|
|
|
|
return inode
|
|
|
|
}
|
|
|
|
|
|
|
|
// overlayEntry is the overlay metadata of an Inode. It implements Mappable.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-04-27 17:37:02 +00:00
|
|
|
type overlayEntry struct {
|
|
|
|
// lowerExists is true if an Inode exists for this file in the lower
|
|
|
|
// filesystem. If lowerExists is true, then the overlay must create
|
|
|
|
// a whiteout entry when renaming and removing this entry to mask the
|
|
|
|
// lower Inode.
|
|
|
|
//
|
|
|
|
// Note that this is distinct from actually holding onto a non-nil
|
|
|
|
// lower Inode (below). The overlay does not need to keep a lower Inode
|
|
|
|
// around unless it needs to operate on it, but it always needs to know
|
|
|
|
// whether the lower Inode exists to correctly execute a rename or
|
|
|
|
// remove operation.
|
|
|
|
lowerExists bool
|
|
|
|
|
|
|
|
// lower is an Inode from a lower filesystem. Modifications are
|
|
|
|
// never made on this Inode.
|
|
|
|
lower *Inode
|
|
|
|
|
|
|
|
// copyMu serializes copy-up for operations above
|
|
|
|
// mm.MemoryManager.mappingMu in the lock order.
|
|
|
|
copyMu sync.RWMutex `state:"nosave"`
|
|
|
|
|
|
|
|
// mapsMu serializes copy-up for operations between
|
|
|
|
// mm.MemoryManager.mappingMu and mm.MemoryManager.activeMu in the lock
|
|
|
|
// order.
|
|
|
|
mapsMu sync.Mutex `state:"nosave"`
|
|
|
|
|
|
|
|
// mappings tracks memory mappings of this Mappable so they can be removed
|
|
|
|
// from the lower filesystem Mappable and added to the upper filesystem
|
|
|
|
// Mappable when copy up occurs. It is strictly unnecessary after copy-up.
|
|
|
|
//
|
|
|
|
// mappings is protected by mapsMu.
|
|
|
|
mappings memmap.MappingSet
|
|
|
|
|
|
|
|
// dataMu serializes copy-up for operations below mm.MemoryManager.activeMu
|
|
|
|
// in the lock order.
|
|
|
|
dataMu sync.RWMutex `state:"nosave"`
|
|
|
|
|
|
|
|
// upper is an Inode from an upper filesystem. It is non-nil if
|
|
|
|
// the file exists in the upper filesystem. It becomes non-nil
|
|
|
|
// when the Inode that owns this overlayEntry is modified.
|
|
|
|
//
|
|
|
|
// upper is protected by all of copyMu, mapsMu, and dataMu. Holding any of
|
|
|
|
// these locks is sufficient to read upper; holding all three for writing
|
|
|
|
// is required to mutate it.
|
|
|
|
upper *Inode
|
2019-06-27 21:22:40 +00:00
|
|
|
|
|
|
|
// dirCacheMu protects dirCache.
|
|
|
|
dirCacheMu gvsync.DowngradableRWMutex `state:"nosave"`
|
|
|
|
|
|
|
|
// dirCache is cache of DentAttrs from upper and lower Inodes.
|
|
|
|
dirCache *SortedDentryMap
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// newOverlayEntry returns a new overlayEntry.
|
|
|
|
func newOverlayEntry(ctx context.Context, upper *Inode, lower *Inode, lowerExists bool) (*overlayEntry, error) {
|
|
|
|
if upper == nil && lower == nil {
|
|
|
|
panic("invalid overlayEntry, needs at least one Inode")
|
|
|
|
}
|
|
|
|
if upper != nil && upper.overlay != nil {
|
|
|
|
panic("nested writable layers are not supported")
|
|
|
|
}
|
|
|
|
// Check for supported lower filesystem types.
|
|
|
|
if lower != nil {
|
|
|
|
switch lower.StableAttr.Type {
|
|
|
|
case RegularFile, Directory, Symlink, Socket:
|
|
|
|
default:
|
|
|
|
// We don't support copying up from character devices,
|
|
|
|
// named pipes, or anything weird (like proc files).
|
|
|
|
log.Warningf("%s not supported in lower filesytem", lower.StableAttr.Type)
|
|
|
|
return nil, syserror.EINVAL
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &overlayEntry{
|
|
|
|
lowerExists: lowerExists,
|
|
|
|
lower: lower,
|
|
|
|
upper: upper,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (o *overlayEntry) release() {
|
|
|
|
// We drop a reference on upper and lower file system Inodes
|
|
|
|
// rather than releasing them, because in-memory filesystems
|
|
|
|
// may hold an extra reference to these Inodes so that they
|
|
|
|
// stay in memory.
|
|
|
|
if o.upper != nil {
|
|
|
|
o.upper.DecRef()
|
|
|
|
}
|
|
|
|
if o.lower != nil {
|
|
|
|
o.lower.DecRef()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// overlayUpperMountSource gives the upper mount of an overlay mount.
|
|
|
|
//
|
|
|
|
// The caller may not use this MountSource past the lifetime of overlayMountSource and may
|
|
|
|
// not call DecRef on it.
|
|
|
|
func overlayUpperMountSource(overlayMountSource *MountSource) *MountSource {
|
|
|
|
return overlayMountSource.MountSourceOperations.(*overlayMountSourceOperations).upper
|
|
|
|
}
|
|
|
|
|
|
|
|
// Preconditions: At least one of o.copyMu, o.mapsMu, or o.dataMu must be locked.
|
|
|
|
func (o *overlayEntry) inodeLocked() *Inode {
|
|
|
|
if o.upper != nil {
|
|
|
|
return o.upper
|
|
|
|
}
|
|
|
|
return o.lower
|
|
|
|
}
|
|
|
|
|
|
|
|
// Preconditions: At least one of o.copyMu, o.mapsMu, or o.dataMu must be locked.
|
|
|
|
func (o *overlayEntry) isMappableLocked() bool {
|
|
|
|
return o.inodeLocked().Mappable() != nil
|
|
|
|
}
|
|
|
|
|
2019-06-27 21:22:40 +00:00
|
|
|
// markDirectoryDirty marks any cached data dirty for this directory. This is
|
|
|
|
// necessary in order to ensure that this node does not retain stale state
|
|
|
|
// throughout its lifetime across multiple open directory handles.
|
|
|
|
//
|
|
|
|
// Currently this means invalidating any readdir caches.
|
|
|
|
func (o *overlayEntry) markDirectoryDirty() {
|
|
|
|
o.dirCacheMu.Lock()
|
|
|
|
o.dirCache = nil
|
|
|
|
o.dirCacheMu.Unlock()
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// AddMapping implements memmap.Mappable.AddMapping.
|
2018-12-12 21:09:10 +00:00
|
|
|
func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
|
2018-04-27 17:37:02 +00:00
|
|
|
o.mapsMu.Lock()
|
|
|
|
defer o.mapsMu.Unlock()
|
2018-12-12 21:09:10 +00:00
|
|
|
if err := o.inodeLocked().Mappable().AddMapping(ctx, ms, ar, offset, writable); err != nil {
|
2018-04-27 17:37:02 +00:00
|
|
|
return err
|
|
|
|
}
|
2018-12-12 21:09:10 +00:00
|
|
|
o.mappings.AddMapping(ms, ar, offset, writable)
|
2018-04-27 17:37:02 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// RemoveMapping implements memmap.Mappable.RemoveMapping.
|
2018-12-12 21:09:10 +00:00
|
|
|
func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
|
2018-04-27 17:37:02 +00:00
|
|
|
o.mapsMu.Lock()
|
|
|
|
defer o.mapsMu.Unlock()
|
2018-12-12 21:09:10 +00:00
|
|
|
o.inodeLocked().Mappable().RemoveMapping(ctx, ms, ar, offset, writable)
|
|
|
|
o.mappings.RemoveMapping(ms, ar, offset, writable)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// CopyMapping implements memmap.Mappable.CopyMapping.
|
2018-12-12 21:09:10 +00:00
|
|
|
func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
|
2018-04-27 17:37:02 +00:00
|
|
|
o.mapsMu.Lock()
|
|
|
|
defer o.mapsMu.Unlock()
|
2018-12-12 21:09:10 +00:00
|
|
|
if err := o.inodeLocked().Mappable().CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil {
|
2018-04-27 17:37:02 +00:00
|
|
|
return err
|
|
|
|
}
|
2018-12-12 21:09:10 +00:00
|
|
|
o.mappings.AddMapping(ms, dstAR, offset, writable)
|
2018-04-27 17:37:02 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Translate implements memmap.Mappable.Translate.
|
|
|
|
func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
|
|
|
|
o.dataMu.RLock()
|
|
|
|
defer o.dataMu.RUnlock()
|
|
|
|
return o.inodeLocked().Mappable().Translate(ctx, required, optional, at)
|
|
|
|
}
|
|
|
|
|
|
|
|
// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
|
|
|
|
func (o *overlayEntry) InvalidateUnsavable(ctx context.Context) error {
|
|
|
|
o.mapsMu.Lock()
|
|
|
|
defer o.mapsMu.Unlock()
|
|
|
|
return o.inodeLocked().Mappable().InvalidateUnsavable(ctx)
|
|
|
|
}
|