Fix /proc/self/mounts and /proc/self/mountinfo in VFS2.

Some extra fields were added to the Mount type to expose necessary data to the
proc filesystem.

PiperOrigin-RevId: 304053361
This commit is contained in:
Nicolas Lacasse 2020-03-31 15:00:26 -07:00 committed by gVisor bot
parent 9de982ea79
commit e1c8eaca8f
2 changed files with 220 additions and 157 deletions

View File

@ -18,13 +18,10 @@ import (
"bytes"
"fmt"
"io"
"sort"
"strings"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@ -634,51 +631,6 @@ func (s *exeSymlink) executable() (file fsbridge.File, err error) {
return
}
// forEachMountSource runs f for the process root mount and each mount that is
// a descendant of the root.
func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) {
var fsctx *kernel.FSContext
t.WithMuLocked(func(t *kernel.Task) {
fsctx = t.FSContext()
})
if fsctx == nil {
// The task has been destroyed. Nothing to show here.
return
}
// All mount points must be relative to the rootDir, and mounts outside
// will be excluded.
rootDir := fsctx.RootDirectory()
if rootDir == nil {
// The task has been destroyed. Nothing to show here.
return
}
defer rootDir.DecRef()
mnt := t.MountNamespace().FindMount(rootDir)
if mnt == nil {
// Has it just been unmounted?
return
}
ms := t.MountNamespace().AllMountsUnder(mnt)
sort.Slice(ms, func(i, j int) bool {
return ms[i].ID < ms[j].ID
})
for _, m := range ms {
mroot := m.Root()
if mroot == nil {
continue // No longer valid.
}
mountPath, desc := mroot.FullName(rootDir)
mroot.DecRef()
if !desc {
// MountSources that are not descendants of the chroot jail are ignored.
continue
}
fn(mountPath, m)
}
}
// mountInfoData is used to implement /proc/[pid]/mountinfo.
//
// +stateify savable
@ -692,94 +644,24 @@ var _ dynamicInode = (*mountInfoData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
forEachMount(i.task, func(mountPath string, m *fs.Mount) {
mroot := m.Root()
if mroot == nil {
return // No longer valid.
}
defer mroot.DecRef()
// Format:
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
// (1) MountSource ID.
fmt.Fprintf(buf, "%d ", m.ID)
// (2) Parent ID (or this ID if there is no parent).
pID := m.ID
if !m.IsRoot() && !m.IsUndo() {
pID = m.ParentID
}
fmt.Fprintf(buf, "%d ", pID)
// (3) Major:Minor device ID. We don't have a superblock, so we
// just use the root inode device number.
sa := mroot.Inode.StableAttr
fmt.Fprintf(buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor)
// (4) Root: the pathname of the directory in the filesystem
// which forms the root of this mount.
//
// NOTE(b/78135857): This will always be "/" until we implement
// bind mounts.
fmt.Fprintf(buf, "/ ")
// (5) Mount point (relative to process root).
fmt.Fprintf(buf, "%s ", mountPath)
// (6) Mount options.
flags := mroot.Inode.MountSource.Flags
opts := "rw"
if flags.ReadOnly {
opts = "ro"
}
if flags.NoAtime {
opts += ",noatime"
}
if flags.NoExec {
opts += ",noexec"
}
fmt.Fprintf(buf, "%s ", opts)
// (7) Optional fields: zero or more fields of the form "tag[:value]".
// (8) Separator: the end of the optional fields is marked by a single hyphen.
fmt.Fprintf(buf, "- ")
// (9) Filesystem type.
fmt.Fprintf(buf, "%s ", mroot.Inode.MountSource.FilesystemType)
// (10) Mount source: filesystem-specific information or "none".
fmt.Fprintf(buf, "none ")
// (11) Superblock options, and final newline.
fmt.Fprintf(buf, "%s\n", superBlockOpts(mountPath, mroot.Inode.MountSource))
var fsctx *kernel.FSContext
i.task.WithMuLocked(func(t *kernel.Task) {
fsctx = t.FSContext()
})
if fsctx == nil {
// The task has been destroyed. Nothing to show here.
return nil
}
rootDir := fsctx.RootDirectoryVFS2()
if !rootDir.Ok() {
// Root has been destroyed. Don't try to read mounts.
return nil
}
defer rootDir.DecRef()
i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf)
return nil
}
func superBlockOpts(mountPath string, msrc *fs.MountSource) string {
// gVisor doesn't (yet) have a concept of super block options, so we
// use the ro/rw bit from the mount flag.
opts := "rw"
if msrc.Flags.ReadOnly {
opts = "ro"
}
// NOTE(b/147673608): If the mount is a cgroup, we also need to include
// the cgroup name in the options. For now we just read that from the
// path.
// TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we
// should get this value from the cgroup itself, and not rely on the
// path.
if msrc.FilesystemType == "cgroup" {
splitPath := strings.Split(mountPath, "/")
cgroupType := splitPath[len(splitPath)-1]
opts += "," + cgroupType
}
return opts
}
// mountsData is used to implement /proc/[pid]/mounts.
//
// +stateify savable
@ -789,33 +671,24 @@ type mountsData struct {
task *kernel.Task
}
var _ dynamicInode = (*mountInfoData)(nil)
var _ dynamicInode = (*mountsData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
forEachMount(i.task, func(mountPath string, m *fs.Mount) {
// Format:
// <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
//
// We use the filesystem name as the first field, since there
// is no real block device we can point to, and we also should
// not expose anything about the remote filesystem.
//
// Only ro/rw option is supported for now.
//
// The "needs dump"and fsck flags are always 0, which is allowed.
root := m.Root()
if root == nil {
return // No longer valid.
}
defer root.DecRef()
flags := root.Inode.MountSource.Flags
opts := "rw"
if flags.ReadOnly {
opts = "ro"
}
fmt.Fprintf(buf, "%s %s %s %s %d %d\n", "none", mountPath, root.Inode.MountSource.FilesystemType, opts, 0, 0)
var fsctx *kernel.FSContext
i.task.WithMuLocked(func(t *kernel.Task) {
fsctx = t.FSContext()
})
if fsctx == nil {
// The task has been destroyed. Nothing to show here.
return nil
}
rootDir := fsctx.RootDirectoryVFS2()
if !rootDir.Ok() {
// Root has been destroyed. Don't try to read mounts.
return nil
}
defer rootDir.DecRef()
i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf)
return nil
}

View File

@ -15,7 +15,11 @@
package vfs
import (
"bytes"
"fmt"
"math"
"sort"
"strings"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
@ -44,7 +48,7 @@ var lastMountID uint64
//
// +stateify savable
type Mount struct {
// vfs, fs, and root are immutable. References are held on fs and root.
// vfs, fs, root are immutable. References are held on fs and root.
//
// Invariant: root belongs to fs.
vfs *VirtualFilesystem
@ -639,12 +643,28 @@ func (mnt *Mount) setReadOnlyLocked(ro bool) error {
return nil
}
func (mnt *Mount) readOnly() bool {
return atomic.LoadInt64(&mnt.writers) < 0
}
// Filesystem returns the mounted Filesystem. It does not take a reference on
// the returned Filesystem.
func (mnt *Mount) Filesystem() *Filesystem {
return mnt.fs
}
// submountsLocked returns this Mount and all Mounts that are descendents of
// it.
//
// Precondition: mnt.vfs.mountMu must be held.
func (mnt *Mount) submountsLocked() []*Mount {
mounts := []*Mount{mnt}
for m := range mnt.children {
mounts = append(mounts, m.submountsLocked()...)
}
return mounts
}
// Root returns mntns' root. A reference is taken on the returned
// VirtualDentry.
func (mntns *MountNamespace) Root() VirtualDentry {
@ -655,3 +675,173 @@ func (mntns *MountNamespace) Root() VirtualDentry {
vd.IncRef()
return vd
}
// GenerateProcMounts emits the contents of /proc/[pid]/mounts for vfs to buf.
//
// Preconditions: taskRootDir.Ok().
func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) {
vfs.mountMu.Lock()
defer vfs.mountMu.Unlock()
rootMnt := taskRootDir.mount
mounts := rootMnt.submountsLocked()
sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID })
for _, mnt := range mounts {
// Get the path to this mount relative to task root.
mntRootVD := VirtualDentry{
mount: mnt,
dentry: mnt.root,
}
path, err := vfs.PathnameReachable(ctx, taskRootDir, mntRootVD)
if err != nil {
// For some reason we didn't get a path. Log a warning
// and run with empty path.
ctx.Warningf("Error getting pathname for mount root %+v: %v", mnt.root, err)
path = ""
}
if path == "" {
// Either an error occurred, or path is not reachable
// from root.
break
}
opts := "rw"
if mnt.readOnly() {
opts = "ro"
}
if mnt.flags.NoExec {
opts += ",noexec"
}
// Format:
// <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
//
// The "needs dump" and "fsck order" flags are always 0, which
// is allowed.
fmt.Fprintf(buf, "%s %s %s %s %d %d\n", "none", path, mnt.fs.FilesystemType().Name(), opts, 0, 0)
}
}
// GenerateProcMountInfo emits the contents of /proc/[pid]/mountinfo for vfs to
// buf.
//
// Preconditions: taskRootDir.Ok().
func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) {
vfs.mountMu.Lock()
defer vfs.mountMu.Unlock()
rootMnt := taskRootDir.mount
mounts := rootMnt.submountsLocked()
sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID })
for _, mnt := range mounts {
// Get the path to this mount relative to task root.
mntRootVD := VirtualDentry{
mount: mnt,
dentry: mnt.root,
}
path, err := vfs.PathnameReachable(ctx, taskRootDir, mntRootVD)
if err != nil {
// For some reason we didn't get a path. Log a warning
// and run with empty path.
ctx.Warningf("Error getting pathname for mount root %+v: %v", mnt.root, err)
path = ""
}
if path == "" {
// Either an error occurred, or path is not reachable
// from root.
break
}
// Stat the mount root to get the major/minor device numbers.
pop := &PathOperation{
Root: mntRootVD,
Start: mntRootVD,
}
statx, err := vfs.StatAt(ctx, auth.NewAnonymousCredentials(), pop, &StatOptions{})
if err != nil {
// Well that's not good. Ignore this mount.
break
}
// Format:
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
// (1) Mount ID.
fmt.Fprintf(buf, "%d ", mnt.ID)
// (2) Parent ID (or this ID if there is no parent).
pID := mnt.ID
if p := mnt.parent(); p != nil {
pID = p.ID
}
fmt.Fprintf(buf, "%d ", pID)
// (3) Major:Minor device ID. We don't have a superblock, so we
// just use the root inode device number.
fmt.Fprintf(buf, "%d:%d ", statx.DevMajor, statx.DevMinor)
// (4) Root: the pathname of the directory in the filesystem
// which forms the root of this mount.
//
// NOTE(b/78135857): This will always be "/" until we implement
// bind mounts.
fmt.Fprintf(buf, "/ ")
// (5) Mount point (relative to process root).
fmt.Fprintf(buf, "%s ", manglePath(path))
// (6) Mount options.
opts := "rw"
if mnt.readOnly() {
opts = "ro"
}
if mnt.flags.NoExec {
opts += ",noexec"
}
// TODO(gvisor.dev/issue/1193): Add "noatime" if MS_NOATIME is
// set.
fmt.Fprintf(buf, "%s ", opts)
// (7) Optional fields: zero or more fields of the form "tag[:value]".
// (8) Separator: the end of the optional fields is marked by a single hyphen.
fmt.Fprintf(buf, "- ")
// (9) Filesystem type.
fmt.Fprintf(buf, "%s ", mnt.fs.FilesystemType().Name())
// (10) Mount source: filesystem-specific information or "none".
fmt.Fprintf(buf, "none ")
// (11) Superblock options, and final newline.
fmt.Fprintf(buf, "%s\n", superBlockOpts(path, mnt))
}
}
// manglePath replaces ' ', '\t', '\n', and '\\' with their octal equivalents.
// See Linux fs/seq_file.c:mangle_path.
func manglePath(p string) string {
r := strings.NewReplacer(" ", "\\040", "\t", "\\011", "\n", "\\012", "\\", "\\134")
return r.Replace(p)
}
// superBlockOpts returns the super block options string for the the mount at
// the given path.
func superBlockOpts(mountPath string, mnt *Mount) string {
// gVisor doesn't (yet) have a concept of super block options, so we
// use the ro/rw bit from the mount flag.
opts := "rw"
if mnt.readOnly() {
opts = "ro"
}
// NOTE(b/147673608): If the mount is a cgroup, we also need to include
// the cgroup name in the options. For now we just read that from the
// path.
// TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we
// should get this value from the cgroup itself, and not rely on the
// path.
if mnt.fs.FilesystemType().Name() == "cgroup" {
splitPath := strings.Split(mountPath, "/")
cgroupType := splitPath[len(splitPath)-1]
opts += "," + cgroupType
}
return opts
}