Fix runsc tests on VFS2 overlay.

- Check the sticky bit in overlay.filesystem.UnlinkAt(). Fixes StickyTest.StickyBitPermDenied. - When configuring a VFS2 overlay in runsc, copy the lower layer's root owner/group/mode to the upper layer's root (as in the VFS1 equivalent, boot.addOverlay()). This makes the overlay root owned by UID/GID 65534 with mode 0755 rather than owned by UID/GID 0 with mode 01777. Fixes CreateTest.CreateFailsOnUnpermittedDir, which assumes that the test cannot create files in /. - MknodTest.UnimplementedTypesReturnError assumes that the creation of device special files is not supported. However, while the VFS2 gofer client still doesn't support device special files, VFS2 tmpfs does, and in the overlay test dimension mknod() targets a tmpfs upper layer. The test initially has all capabilities, including CAP_MKNOD, so its creation of these files succeeds. Constrain these tests to VFS1. - Rename overlay.nonDirectoryFD to overlay.regularFileFD and only use it for regular files, using the original FD for pipes and device special files. This is more consistent with Linux (which gets the original inode_operations, and therefore file_operations, for these file types from ovl_fill_inode() => init_special_inode()) and fixes remaining mknod and pipe tests. - Read/write 1KB at a time in PipeTest.Streaming, rather than 4 bytes. This isn't strictly necessary, but it makes the test less obnoxiously slow on ptrace. Fixes #4407 PiperOrigin-RevId: 337971042
2020-10-19 17:46:05 -07:00 · 2020-10-19 17:46:05 -07:00 · cd86bd4931
parent 8f29b8d252
commit cd86bd4931
10 changed files with 247 additions and 100 deletions
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@ -21,14 +21,16 @@ go_library(
        "directory.go",
        "filesystem.go",
        "fstree.go",
-        "non_directory.go",
        "overlay.go",
+        "regular_file.go",
    ],
    visibility = ["//pkg/sentry:internal"],
    deps = [
        "//pkg/abi/linux",
        "//pkg/context",
        "//pkg/fspath",
+        "//pkg/log",
+        "//pkg/sentry/arch",
        "//pkg/sentry/fs/lock",
        "//pkg/sentry/kernel/auth",
        "//pkg/sentry/memmap",
@ -37,5 +39,6 @@ go_library(
        "//pkg/sync",
        "//pkg/syserror",
        "//pkg/usermem",
+        "//pkg/waiter",
    ],
 )
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@ -75,8 +75,21 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		return syserror.ENOENT
 	}

-	// Perform copy-up.
+	// Obtain settable timestamps from the lower layer.
 	vfsObj := d.fs.vfsfs.VirtualFilesystem()
+	oldpop := vfs.PathOperation{
+		Root:  d.lowerVDs[0],
+		Start: d.lowerVDs[0],
+	}
+	const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME
+	oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{
+		Mask: timestampsMask,
+	})
+	if err != nil {
+		return err
+	}
+
+	// Perform copy-up.
 	newpop := vfs.PathOperation{
 		Root:  d.parent.upperVD,
 		Start: d.parent.upperVD,
@ -101,10 +114,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 	}
 	switch ftype {
 	case linux.S_IFREG:
-		oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
-			Root:  d.lowerVDs[0],
-			Start: d.lowerVDs[0],
-		}, &vfs.OpenOptions{
+		oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{
 			Flags: linux.O_RDONLY,
 		})
 		if err != nil {
@ -160,9 +170,11 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		}
 		if err := newFD.SetStat(ctx, vfs.SetStatOptions{
 			Stat: linux.Statx{
-				Mask: linux.STATX_UID | linux.STATX_GID,
-				UID:  d.uid,
-				GID:  d.gid,
+				Mask:  linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+				UID:   d.uid,
+				GID:   d.gid,
+				Atime: oldStat.Atime,
+				Mtime: oldStat.Mtime,
 			},
 		}); err != nil {
 			cleanupUndoCopyUp()
@ -179,9 +191,11 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		}
 		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
 			Stat: linux.Statx{
-				Mask: linux.STATX_UID | linux.STATX_GID,
-				UID:  d.uid,
-				GID:  d.gid,
+				Mask:  linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+				UID:   d.uid,
+				GID:   d.gid,
+				Atime: oldStat.Atime,
+				Mtime: oldStat.Mtime,
 			},
 		}); err != nil {
 			cleanupUndoCopyUp()
@ -195,10 +209,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		d.upperVD = upperVD

 	case linux.S_IFLNK:
-		target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
-			Root:  d.lowerVDs[0],
-			Start: d.lowerVDs[0],
-		})
+		target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop)
 		if err != nil {
 			return err
 		}
@ -207,10 +218,12 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		}
 		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
 			Stat: linux.Statx{
-				Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID,
-				Mode: uint16(d.mode),
-				UID:  d.uid,
-				GID:  d.gid,
+				Mask:  linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+				Mode:  uint16(d.mode),
+				UID:   d.uid,
+				GID:   d.gid,
+				Atime: oldStat.Atime,
+				Mtime: oldStat.Mtime,
 			},
 		}); err != nil {
 			cleanupUndoCopyUp()
@ -224,25 +237,20 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
 		d.upperVD = upperVD

 	case linux.S_IFBLK, linux.S_IFCHR:
-		lowerStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
-			Root:  d.lowerVDs[0],
-			Start: d.lowerVDs[0],
-		}, &vfs.StatOptions{})
-		if err != nil {
-			return err
-		}
 		if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{
 			Mode:     linux.FileMode(d.mode),
-			DevMajor: lowerStat.RdevMajor,
-			DevMinor: lowerStat.RdevMinor,
+			DevMajor: oldStat.RdevMajor,
+			DevMinor: oldStat.RdevMinor,
 		}); err != nil {
 			return err
 		}
 		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
 			Stat: linux.Statx{
-				Mask: linux.STATX_UID | linux.STATX_GID,
-				UID:  d.uid,
-				GID:  d.gid,
+				Mask:  linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+				UID:   d.uid,
+				GID:   d.gid,
+				Atime: oldStat.Atime,
+				Mtime: oldStat.Mtime,
 			},
 		}); err != nil {
 			cleanupUndoCopyUp()
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@ -765,7 +765,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 		if mustCreate {
 			return nil, syserror.EEXIST
 		}
-		if mayWrite {
+		if start.isRegularFile() && mayWrite {
 			if err := start.copyUpLocked(ctx); err != nil {
 				return nil, err
 			}
@ -819,7 +819,7 @@ afterTrailingSymlink:
 	if rp.MustBeDir() && !child.isDir() {
 		return nil, syserror.ENOTDIR
 	}
-	if mayWrite {
+	if child.isRegularFile() && mayWrite {
 		if err := child.copyUpLocked(ctx); err != nil {
 			return nil, err
 		}
@ -872,8 +872,11 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *
 	if err != nil {
 		return nil, err
 	}
+	if ftype != linux.S_IFREG {
+		return layerFD, nil
+	}
 	layerFlags := layerFD.StatusFlags()
-	fd := &nonDirectoryFD{
+	fd := &regularFileFD{
 		copiedUp:    isUpper,
 		cachedFD:    layerFD,
 		cachedFlags: layerFlags,
@ -969,7 +972,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
 	}
 	// Finally construct the overlay FD.
 	upperFlags := upperFD.StatusFlags()
-	fd := &nonDirectoryFD{
+	fd := &regularFileFD{
 		copiedUp:    true,
 		cachedFD:    upperFD,
 		cachedFlags: upperFlags,
@ -1293,6 +1296,9 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	if !child.isDir() {
 		return syserror.ENOTDIR
 	}
+	if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(atomic.LoadUint32(&parent.mode)), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil {
+		return err
+	}
 	child.dirMu.Lock()
 	defer child.dirMu.Unlock()
 	whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx)
@ -1528,12 +1534,38 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 		return err
 	}

+	parentMode := atomic.LoadUint32(&parent.mode)
 	child := parent.children[name]
 	var childLayer lookupLayer
+	if child == nil {
+		if parentMode&linux.S_ISVTX != 0 {
+			// If the parent's sticky bit is set, we need a child dentry to get
+			// its owner.
+			child, err = fs.getChildLocked(ctx, parent, name, &ds)
+			if err != nil {
+				return err
+			}
+		} else {
+			// Determine if the file being unlinked actually exists. Holding
+			// parent.dirMu prevents a dentry from being instantiated for the file,
+			// which in turn prevents it from being copied-up, so this result is
+			// stable.
+			childLayer, err = fs.lookupLayerLocked(ctx, parent, name)
+			if err != nil {
+				return err
+			}
+			if !childLayer.existsInOverlay() {
+				return syserror.ENOENT
+			}
+		}
+	}
 	if child != nil {
 		if child.isDir() {
 			return syserror.EISDIR
 		}
+		if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(parentMode), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil {
+			return err
+		}
 		if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
 			return err
 		}
@ -1546,18 +1578,6 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 		} else {
 			childLayer = lookupLayerLower
 		}
-	} else {
-		// Determine if the file being unlinked actually exists. Holding
-		// parent.dirMu prevents a dentry from being instantiated for the file,
-		// which in turn prevents it from being copied-up, so this result is
-		// stable.
-		childLayer, err = fs.lookupLayerLocked(ctx, parent, name)
-		if err != nil {
-			return err
-		}
-		if !childLayer.existsInOverlay() {
-			return syserror.ENOENT
-		}
 	}

 	pop := vfs.PathOperation{
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@ -18,7 +18,7 @@
 //
 // Lock order:
 //
-// directoryFD.mu / nonDirectoryFD.mu
+// directoryFD.mu / regularFileFD.mu
 //   filesystem.renameMu
 //     dentry.dirMu
 //       dentry.copyMu
@ -453,7 +453,7 @@ type dentry struct {
 	// - If this dentry is copied-up, then wrappedMappable is the Mappable
 	// obtained from a call to the current top layer's
 	// FileDescription.ConfigureMMap(). Once wrappedMappable becomes non-nil
-	// (from a call to nonDirectoryFD.ensureMappable()), it cannot become nil.
+	// (from a call to regularFileFD.ensureMappable()), it cannot become nil.
 	// wrappedMappable is protected by mapsMu and dataMu.
 	//
 	// - isMappable is non-zero iff wrappedMappable is non-nil. isMappable is
--- a/pkg/sentry/fsimpl/overlay/non_directory.go
+++ b/pkg/sentry/fsimpl/overlay/non_directory.go
@ -19,14 +19,21 @@ import (

 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/pkg/waiter"
 )

+func (d *dentry) isRegularFile() bool {
+	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFREG
+}
+
 func (d *dentry) isSymlink() bool {
 	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
 }
@ -40,7 +47,7 @@ func (d *dentry) readlink(ctx context.Context) (string, error) {
 }

 // +stateify savable
-type nonDirectoryFD struct {
+type regularFileFD struct {
 	fileDescription

 	// If copiedUp is false, cachedFD represents
@ -52,9 +59,13 @@ type nonDirectoryFD struct {
 	copiedUp    bool
 	cachedFD    *vfs.FileDescription
 	cachedFlags uint32
+
+	// If copiedUp is false, lowerWaiters contains all waiter.Entries
+	// registered with cachedFD. lowerWaiters is protected by mu.
+	lowerWaiters map[*waiter.Entry]waiter.EventMask
 }

-func (fd *nonDirectoryFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) {
+func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) {
 	fd.mu.Lock()
 	defer fd.mu.Unlock()
 	wrappedFD, err := fd.currentFDLocked(ctx)
@ -65,7 +76,7 @@ func (fd *nonDirectoryFD) getCurrentFD(ctx context.Context) (*vfs.FileDescriptio
 	return wrappedFD, nil
 }

-func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) {
+func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) {
 	d := fd.dentry()
 	statusFlags := fd.vfsfd.StatusFlags()
 	if !fd.copiedUp && d.isCopiedUp() {
@ -87,10 +98,21 @@ func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescrip
 				return nil, err
 			}
 		}
+		if len(fd.lowerWaiters) != 0 {
+			ready := upperFD.Readiness(^waiter.EventMask(0))
+			for e, mask := range fd.lowerWaiters {
+				fd.cachedFD.EventUnregister(e)
+				upperFD.EventRegister(e, mask)
+				if ready&mask != 0 {
+					e.Callback.Callback(e)
+				}
+			}
+		}
 		fd.cachedFD.DecRef(ctx)
 		fd.copiedUp = true
 		fd.cachedFD = upperFD
 		fd.cachedFlags = statusFlags
+		fd.lowerWaiters = nil
 	} else if fd.cachedFlags != statusFlags {
 		if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil {
 			return nil, err
@ -101,13 +123,13 @@ func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescrip
 }

 // Release implements vfs.FileDescriptionImpl.Release.
-func (fd *nonDirectoryFD) Release(ctx context.Context) {
+func (fd *regularFileFD) Release(ctx context.Context) {
 	fd.cachedFD.DecRef(ctx)
 	fd.cachedFD = nil
 }

 // OnClose implements vfs.FileDescriptionImpl.OnClose.
-func (fd *nonDirectoryFD) OnClose(ctx context.Context) error {
+func (fd *regularFileFD) OnClose(ctx context.Context) error {
 	// Linux doesn't define ovl_file_operations.flush at all (i.e. its
 	// equivalent to OnClose is a no-op). We pass through to
 	// fd.cachedFD.OnClose() without upgrading if fd.dentry() has been
@ -128,7 +150,7 @@ func (fd *nonDirectoryFD) OnClose(ctx context.Context) error {
 }

 // Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
 	var stat linux.Statx
 	if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 {
 		wrappedFD, err := fd.getCurrentFD(ctx)
@ -149,7 +171,7 @@ func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux
 }

 // Allocate implements vfs.FileDescriptionImpl.Allocate.
-func (fd *nonDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
 	wrappedFD, err := fd.getCurrentFD(ctx)
 	if err != nil {
 		return err
@ -159,7 +181,7 @@ func (fd *nonDirectoryFD) Allocate(ctx context.Context, mode, offset, length uin
 }

 // SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
 	d := fd.dentry()
 	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
 	if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
@ -191,12 +213,61 @@ func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions)
 }

 // StatFS implements vfs.FileDescriptionImpl.StatFS.
-func (fd *nonDirectoryFD) StatFS(ctx context.Context) (linux.Statfs, error) {
+func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) {
 	return fd.filesystem().statFS(ctx)
 }

+// Readiness implements waiter.Waitable.Readiness.
+func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+	ctx := context.Background()
+	wrappedFD, err := fd.getCurrentFD(ctx)
+	if err != nil {
+		// TODO(b/171089913): Just use fd.cachedFD since Readiness can't return
+		// an error. This is obviously wrong, but at least consistent with
+		// VFS1.
+		log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err)
+		fd.mu.Lock()
+		wrappedFD = fd.cachedFD
+		wrappedFD.IncRef()
+		fd.mu.Unlock()
+	}
+	defer wrappedFD.DecRef(ctx)
+	return wrappedFD.Readiness(mask)
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (fd *regularFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+	fd.mu.Lock()
+	defer fd.mu.Unlock()
+	wrappedFD, err := fd.currentFDLocked(context.Background())
+	if err != nil {
+		// TODO(b/171089913): Just use fd.cachedFD since EventRegister can't
+		// return an error. This is obviously wrong, but at least consistent
+		// with VFS1.
+		log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err)
+		wrappedFD = fd.cachedFD
+	}
+	wrappedFD.EventRegister(e, mask)
+	if !fd.copiedUp {
+		if fd.lowerWaiters == nil {
+			fd.lowerWaiters = make(map[*waiter.Entry]waiter.EventMask)
+		}
+		fd.lowerWaiters[e] = mask
+	}
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (fd *regularFileFD) EventUnregister(e *waiter.Entry) {
+	fd.mu.Lock()
+	defer fd.mu.Unlock()
+	fd.cachedFD.EventUnregister(e)
+	if !fd.copiedUp {
+		delete(fd.lowerWaiters, e)
+	}
+}
+
 // PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *nonDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
 	wrappedFD, err := fd.getCurrentFD(ctx)
 	if err != nil {
 		return 0, err
@ -206,7 +277,7 @@ func (fd *nonDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, off
 }

 // Read implements vfs.FileDescriptionImpl.Read.
-func (fd *nonDirectoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
 	// Hold fd.mu during the read to serialize the file offset.
 	fd.mu.Lock()
 	defer fd.mu.Unlock()
@ -218,7 +289,7 @@ func (fd *nonDirectoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts
 }

 // PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *nonDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
 	wrappedFD, err := fd.getCurrentFD(ctx)
 	if err != nil {
 		return 0, err
@ -228,7 +299,7 @@ func (fd *nonDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, of
 }

 // Write implements vfs.FileDescriptionImpl.Write.
-func (fd *nonDirectoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
 	// Hold fd.mu during the write to serialize the file offset.
 	fd.mu.Lock()
 	defer fd.mu.Unlock()
@ -240,7 +311,7 @@ func (fd *nonDirectoryFD) Write(ctx context.Context, src usermem.IOSequence, opt
 }

 // Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *nonDirectoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
 	// Hold fd.mu during the seek to serialize the file offset.
 	fd.mu.Lock()
 	defer fd.mu.Unlock()
@ -252,7 +323,7 @@ func (fd *nonDirectoryFD) Seek(ctx context.Context, offset int64, whence int32)
 }

 // Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *nonDirectoryFD) Sync(ctx context.Context) error {
+func (fd *regularFileFD) Sync(ctx context.Context) error {
 	fd.mu.Lock()
 	if !fd.dentry().isCopiedUp() {
 		fd.mu.Unlock()
@ -269,8 +340,18 @@ func (fd *nonDirectoryFD) Sync(ctx context.Context) error {
 	return wrappedFD.Sync(ctx)
 }

+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	wrappedFD, err := fd.getCurrentFD(ctx)
+	if err != nil {
+		return 0, err
+	}
+	defer wrappedFD.DecRef(ctx)
+	return wrappedFD.Ioctl(ctx, uio, args)
+}
+
 // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
-func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
 	if err := fd.ensureMappable(ctx, opts); err != nil {
 		return err
 	}
@ -278,7 +359,7 @@ func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOp
 }

 // ensureMappable ensures that fd.dentry().wrappedMappable is not nil.
-func (fd *nonDirectoryFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error {
+func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error {
 	d := fd.dentry()

 	// Fast path if we already have a Mappable for the current top layer.
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@ -237,8 +237,7 @@ func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal

 // PipeSize implements fcntl(F_GETPIPE_SZ).
 func (fd *VFSPipeFD) PipeSize() int64 {
-	// Inline Pipe.FifoSize() rather than calling it with nil Context and
-	// fs.File and ignoring the returned error (which is always nil).
+	// Inline Pipe.FifoSize() since we don't have a fs.File.
 	fd.pipe.mu.Lock()
 	defer fd.pipe.mu.Unlock()
 	return fd.pipe.max
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@ -145,16 +145,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		return uintptr(file.StatusFlags()), nil, nil
 	case linux.F_SETFL:
 		return 0, nil, file.SetStatusFlags(t, t.Credentials(), args[2].Uint())
-	case linux.F_SETPIPE_SZ:
-		pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
-		if !ok {
-			return 0, nil, syserror.EBADF
-		}
-		n, err := pipefile.SetPipeSize(int64(args[2].Int()))
-		if err != nil {
-			return 0, nil, err
-		}
-		return uintptr(n), nil, nil
 	case linux.F_GETOWN:
 		owner, hasOwner := getAsyncOwner(t, file)
 		if !hasOwner {
@ -190,6 +180,16 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 			return 0, nil, err
 		}
 		return 0, nil, setAsyncOwner(t, file, owner.Type, owner.PID)
+	case linux.F_SETPIPE_SZ:
+		pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
+		if !ok {
+			return 0, nil, syserror.EBADF
+		}
+		n, err := pipefile.SetPipeSize(int64(args[2].Int()))
+		if err != nil {
+			return 0, nil, err
+		}
+		return uintptr(n), nil, nil
 	case linux.F_GETPIPE_SZ:
 		pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
 		if !ok {
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@ -264,10 +264,38 @@ func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Cre
 	}
 	cu.Add(func() { lower.DecRef(ctx) })

+	// Propagate the lower layer's root's owner, group, and mode to the upper
+	// layer's root for consistency with VFS1.
+	upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root())
+	lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root())
+	stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{
+		Root:  lowerRootVD,
+		Start: lowerRootVD,
+	}, &vfs.StatOptions{
+		Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE,
+	})
+	if err != nil {
+		return nil, nil, err
+	}
+	err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{
+		Root:  upperRootVD,
+		Start: upperRootVD,
+	}, &vfs.SetStatOptions{
+		Stat: linux.Statx{
+			Mask: (linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE) & stat.Mask,
+			UID:  stat.UID,
+			GID:  stat.GID,
+			Mode: stat.Mode,
+		},
+	})
+	if err != nil {
+		return nil, nil, err
+	}
+
 	// Configure overlay with both layers.
 	overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{
-		UpperRoot:  vfs.MakeVirtualDentry(upper, upper.Root()),
-		LowerRoots: []vfs.VirtualDentry{vfs.MakeVirtualDentry(lower, lower.Root())},
+		UpperRoot:  upperRootVD,
+		LowerRoots: []vfs.VirtualDentry{lowerRootVD},
 	}
 	return &overlayOpts, cu.Release(), nil
 }
--- a/test/syscalls/linux/mknod.cc
+++ b/test/syscalls/linux/mknod.cc
@ -93,15 +93,15 @@ TEST(MknodTest, MknodOnExistingPathFails) {
 }

 TEST(MknodTest, UnimplementedTypesReturnError) {
-  const std::string path = NewTempAbsPath();
+  // TODO(gvisor.dev/issue/1624): These file types are supported by some
+  // filesystems in VFS2, so this test should be deleted along with VFS1.
+  SKIP_IF(!IsRunningWithVFS1());

-  if (IsRunningWithVFS1()) {
-    ASSERT_THAT(mknod(path.c_str(), S_IFSOCK, 0),
-                SyscallFailsWithErrno(EOPNOTSUPP));
-  }
-  // These will fail on linux as well since we don't have CAP_MKNOD.
-  ASSERT_THAT(mknod(path.c_str(), S_IFCHR, 0), SyscallFailsWithErrno(EPERM));
-  ASSERT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
+  const std::string path = NewTempAbsPath();
+  EXPECT_THAT(mknod(path.c_str(), S_IFSOCK, 0),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+  EXPECT_THAT(mknod(path.c_str(), S_IFCHR, 0), SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
 }

 TEST(MknodTest, Socket) {
--- a/test/syscalls/linux/pipe.cc
+++ b/test/syscalls/linux/pipe.cc
@ -569,30 +569,38 @@ TEST_P(PipeTest, Streaming) {

  // Size() requires 2 syscalls, call it once and remember the value.
  const int pipe_size = Size();
+  const size_t streamed_bytes = 4 * pipe_size;

  absl::Notification notify;
-  ScopedThread t([this, &notify, pipe_size]() {
+  ScopedThread t([&, this]() {
+    std::vector<char> buf(1024);
    // Don't start until it's full.
    notify.WaitForNotification();
-    for (int i = 0; i < pipe_size; i++) {
-      int rbuf;
-      ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
-                  SyscallSucceedsWithValue(sizeof(rbuf)));
-      EXPECT_EQ(rbuf, i);
+    ssize_t total = 0;
+    while (total < streamed_bytes) {
+      ASSERT_THAT(read(rfd_.get(), buf.data(), buf.size()),
+                  SyscallSucceedsWithValue(buf.size()));
+      total += buf.size();
    }
  });

  // Write 4 bytes * pipe_size. It will fill up the pipe once, notify the reader
  // to start. Then we write pipe size worth 3 more times to ensure the reader
  // can follow along.
+  //
+  // The size of each write (which is determined by buf.size()) must be smaller
+  // than the size of the pipe (which, in the "smallbuffer" configuration, is 1
+  // page) for the check for notify.Notify() below to be correct.
+  std::vector<char> buf(1024);
+  RandomizeBuffer(buf.data(), buf.size());
  ssize_t total = 0;
-  for (int i = 0; i < pipe_size; i++) {
-    ssize_t written = write(wfd_.get(), &i, sizeof(i));
-    ASSERT_THAT(written, SyscallSucceedsWithValue(sizeof(i)));
-    total += written;
+  while (total < streamed_bytes) {
+    ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(buf.size()));
+    total += buf.size();

    // Is the next write about to fill up the buffer? Wake up the reader once.
-    if (total < pipe_size && (total + written) >= pipe_size) {
+    if (total < pipe_size && (total + buf.size()) >= pipe_size) {
      notify.Notify();
    }
  }