gvisor/pkg/sentry/fsimpl/proc/tasks_files.go

385 lines
12 KiB
Go

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"bytes"
"fmt"
"strconv"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
type selfSymlink struct {
kernfs.InodeAttrs
kernfs.InodeNoopRefCount
kernfs.InodeSymlink
pidns *kernel.PIDNamespace
}
var _ kernfs.Inode = (*selfSymlink)(nil)
func (fs *filesystem) newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
inode := &selfSymlink{pidns: pidns}
inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
d := &kernfs.Dentry{}
d.Init(inode)
return d
}
func (s *selfSymlink) Readlink(ctx context.Context) (string, error) {
t := kernel.TaskFromContext(ctx)
if t == nil {
// Who is reading this link?
return "", syserror.EINVAL
}
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
if tgid == 0 {
return "", syserror.ENOENT
}
return strconv.FormatUint(uint64(tgid), 10), nil
}
func (s *selfSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
target, err := s.Readlink(ctx)
return vfs.VirtualDentry{}, target, err
}
// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
type threadSelfSymlink struct {
kernfs.InodeAttrs
kernfs.InodeNoopRefCount
kernfs.InodeSymlink
pidns *kernel.PIDNamespace
}
var _ kernfs.Inode = (*threadSelfSymlink)(nil)
func (fs *filesystem) newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
inode := &threadSelfSymlink{pidns: pidns}
inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
d := &kernfs.Dentry{}
d.Init(inode)
return d
}
func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) {
t := kernel.TaskFromContext(ctx)
if t == nil {
// Who is reading this link?
return "", syserror.EINVAL
}
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
tid := s.pidns.IDOfTask(t)
if tid == 0 || tgid == 0 {
return "", syserror.ENOENT
}
return fmt.Sprintf("%d/task/%d", tgid, tid), nil
}
func (s *threadSelfSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
target, err := s.Readlink(ctx)
return vfs.VirtualDentry{}, target, err
}
// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
// dynamicBytesFileSetAttr implements a special file that allows inode
// attributes to be set. This is to support /proc files that are readonly, but
// allow attributes to be set.
type dynamicBytesFileSetAttr struct {
kernfs.DynamicBytesFile
}
// SetStat implements Inode.SetStat.
func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
return d.DynamicBytesFile.InodeAttrs.SetStat(ctx, fs, creds, opts)
}
// cpuStats contains the breakdown of CPU time for /proc/stat.
type cpuStats struct {
// user is time spent in userspace tasks with non-positive niceness.
user uint64
// nice is time spent in userspace tasks with positive niceness.
nice uint64
// system is time spent in non-interrupt kernel context.
system uint64
// idle is time spent idle.
idle uint64
// ioWait is time spent waiting for IO.
ioWait uint64
// irq is time spent in interrupt context.
irq uint64
// softirq is time spent in software interrupt context.
softirq uint64
// steal is involuntary wait time.
steal uint64
// guest is time spent in guests with non-positive niceness.
guest uint64
// guestNice is time spent in guests with positive niceness.
guestNice uint64
}
// String implements fmt.Stringer.
func (c cpuStats) String() string {
return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
}
// statData implements vfs.DynamicBytesSource for /proc/stat.
//
// +stateify savable
type statData struct {
dynamicBytesFileSetAttr
}
var _ dynamicInode = (*statData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (*statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// TODO(b/37226836): We currently export only zero CPU stats. We could
// at least provide some aggregate stats.
var cpu cpuStats
fmt.Fprintf(buf, "cpu %s\n", cpu)
k := kernel.KernelFromContext(ctx)
for c, max := uint(0), k.ApplicationCores(); c < max; c++ {
fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
}
// The total number of interrupts is dependent on the CPUs and PCI
// devices on the system. See arch_probe_nr_irqs.
//
// Since we don't report real interrupt stats, just choose an arbitrary
// value from a representative VM.
const numInterrupts = 256
// The Kernel doesn't handle real interrupts, so report all zeroes.
// TODO(b/37226836): We could count page faults as #PF.
fmt.Fprintf(buf, "intr 0") // total
for i := 0; i < numInterrupts; i++ {
fmt.Fprintf(buf, " 0")
}
fmt.Fprintf(buf, "\n")
// Total number of context switches.
// TODO(b/37226836): Count this.
fmt.Fprintf(buf, "ctxt 0\n")
// CLOCK_REALTIME timestamp from boot, in seconds.
fmt.Fprintf(buf, "btime %d\n", k.Timekeeper().BootTime().Seconds())
// Total number of clones.
// TODO(b/37226836): Count this.
fmt.Fprintf(buf, "processes 0\n")
// Number of runnable tasks.
// TODO(b/37226836): Count this.
fmt.Fprintf(buf, "procs_running 0\n")
// Number of tasks waiting on IO.
// TODO(b/37226836): Count this.
fmt.Fprintf(buf, "procs_blocked 0\n")
// Number of each softirq handled.
fmt.Fprintf(buf, "softirq 0") // total
for i := 0; i < linux.NumSoftIRQ; i++ {
fmt.Fprintf(buf, " 0")
}
fmt.Fprintf(buf, "\n")
return nil
}
// loadavgData backs /proc/loadavg.
//
// +stateify savable
type loadavgData struct {
dynamicBytesFileSetAttr
}
var _ dynamicInode = (*loadavgData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (*loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// TODO(b/62345059): Include real data in fields.
// Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
// Column 4-5: currently running processes and the total number of processes.
// Column 6: the last process ID used.
fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
return nil
}
// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
//
// +stateify savable
type meminfoData struct {
dynamicBytesFileSetAttr
}
var _ dynamicInode = (*meminfoData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
k := kernel.KernelFromContext(ctx)
mf := k.MemoryFile()
mf.UpdateUsage()
snapshot, totalUsage := usage.MemoryAccounting.Copy()
totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
anon := snapshot.Anonymous + snapshot.Tmpfs
file := snapshot.PageCache + snapshot.Mapped
// We don't actually have active/inactive LRUs, so just make up numbers.
activeFile := (file / 2) &^ (usermem.PageSize - 1)
inactiveFile := file - activeFile
fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
memFree := totalSize - totalUsage
if memFree > totalSize {
// Underflow.
memFree = 0
}
// We use MemFree as MemAvailable because we don't swap.
// TODO(rahat): When reclaim is implemented the value of MemAvailable
// should change.
fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree/1024)
fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree/1024)
fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices
fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
// Emulate a system with no swap, which disables inactivation of anon pages.
fmt.Fprintf(buf, "SwapCache: 0 kB\n")
fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024)
fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024)
fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024)
fmt.Fprintf(buf, "Inactive(anon): 0 kB\n")
fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024)
fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263)
fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263)
fmt.Fprintf(buf, "SwapTotal: 0 kB\n")
fmt.Fprintf(buf, "SwapFree: 0 kB\n")
fmt.Fprintf(buf, "Dirty: 0 kB\n")
fmt.Fprintf(buf, "Writeback: 0 kB\n")
fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024)
fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
return nil
}
// uptimeData implements vfs.DynamicBytesSource for /proc/uptime.
//
// +stateify savable
type uptimeData struct {
dynamicBytesFileSetAttr
}
var _ dynamicInode = (*uptimeData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error {
k := kernel.KernelFromContext(ctx)
now := time.NowFromContext(ctx)
// Pretend that we've spent zero time sleeping (second number).
fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds())
return nil
}
// versionData implements vfs.DynamicBytesSource for /proc/version.
//
// +stateify savable
type versionData struct {
dynamicBytesFileSetAttr
}
var _ dynamicInode = (*versionData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (*versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
k := kernel.KernelFromContext(ctx)
init := k.GlobalInit()
if init == nil {
// Attempted to read before the init Task is created. This can
// only occur during startup, which should never need to read
// this file.
panic("Attempted to read version before initial Task is available")
}
// /proc/version takes the form:
//
// "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
// (COMPILER_VERSION) VERSION"
//
// where:
// - SYSNAME, RELEASE, and VERSION are the same as returned by
// sys_utsname
// - COMPILE_USER is the user that build the kernel
// - COMPILE_HOST is the hostname of the machine on which the kernel
// was built
// - COMPILER_VERSION is the version reported by the building compiler
//
// Since we don't really want to expose build information to
// applications, those fields are omitted.
//
// FIXME(mpratt): Using Version from the init task SyscallTable
// disregards the different version a task may have (e.g., in a uts
// namespace).
ver := init.Leader().SyscallTable().Version
fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
return nil
}
// filesystemsData backs /proc/filesystems.
//
// +stateify savable
type filesystemsData struct {
kernfs.DynamicBytesFile
}
var _ dynamicInode = (*filesystemsData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
k := kernel.KernelFromContext(ctx)
k.VFS().GenerateProcFilesystems(buf)
return nil
}