2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package kernel
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"sort"
|
|
|
|
"sync"
|
|
|
|
"sync/atomic"
|
|
|
|
"syscall"
|
|
|
|
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
|
|
"gvisor.dev/gvisor/pkg/refs"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/kdefs"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/limits"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// FDs is an ordering of FD's that can be made stable.
|
|
|
|
type FDs []kdefs.FD
|
|
|
|
|
|
|
|
func (f FDs) Len() int {
|
|
|
|
return len(f)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f FDs) Swap(i, j int) {
|
|
|
|
f[i], f[j] = f[j], f[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f FDs) Less(i, j int) bool {
|
|
|
|
return f[i] < f[j]
|
|
|
|
}
|
|
|
|
|
|
|
|
// FDFlags define flags for an individual descriptor.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-04-27 17:37:02 +00:00
|
|
|
type FDFlags struct {
|
|
|
|
// CloseOnExec indicates the descriptor should be closed on exec.
|
|
|
|
CloseOnExec bool
|
|
|
|
}
|
|
|
|
|
2018-08-09 23:49:23 +00:00
|
|
|
// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
|
|
|
|
// representation.
|
2018-07-31 18:18:02 +00:00
|
|
|
func (f FDFlags) ToLinuxFileFlags() (mask uint) {
|
|
|
|
if f.CloseOnExec {
|
|
|
|
mask |= linux.O_CLOEXEC
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-08-09 23:49:23 +00:00
|
|
|
// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
|
|
|
|
// representation.
|
2018-07-31 18:18:02 +00:00
|
|
|
func (f FDFlags) ToLinuxFDFlags() (mask uint) {
|
|
|
|
if f.CloseOnExec {
|
|
|
|
mask |= linux.FD_CLOEXEC
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// descriptor holds the details about a file descriptor, namely a pointer the
|
|
|
|
// file itself and the descriptor flags.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-04-27 17:37:02 +00:00
|
|
|
type descriptor struct {
|
|
|
|
file *fs.File
|
|
|
|
flags FDFlags
|
|
|
|
}
|
|
|
|
|
|
|
|
// FDMap is used to manage File references and flags.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-04-27 17:37:02 +00:00
|
|
|
type FDMap struct {
|
|
|
|
refs.AtomicRefCount
|
|
|
|
k *Kernel
|
|
|
|
files map[kdefs.FD]descriptor
|
|
|
|
mu sync.RWMutex `state:"nosave"`
|
|
|
|
uid uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// ID returns a unique identifier for this FDMap.
|
|
|
|
func (f *FDMap) ID() uint64 {
|
|
|
|
return f.uid
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewFDMap allocates a new FDMap that may be used by tasks in k.
|
|
|
|
func (k *Kernel) NewFDMap() *FDMap {
|
|
|
|
return &FDMap{
|
|
|
|
k: k,
|
|
|
|
files: make(map[kdefs.FD]descriptor),
|
|
|
|
uid: atomic.AddUint64(&k.fdMapUids, 1),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// destroy removes all of the file descriptors from the map.
|
|
|
|
func (f *FDMap) destroy() {
|
|
|
|
f.RemoveIf(func(*fs.File, FDFlags) bool {
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// DecRef implements RefCounter.DecRef with destructor f.destroy.
|
|
|
|
func (f *FDMap) DecRef() {
|
|
|
|
f.DecRefWithDestructor(f.destroy)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Size returns the number of file descriptor slots currently allocated.
|
|
|
|
func (f *FDMap) Size() int {
|
|
|
|
f.mu.RLock()
|
|
|
|
defer f.mu.RUnlock()
|
|
|
|
|
|
|
|
return len(f.files)
|
|
|
|
}
|
|
|
|
|
|
|
|
// String is a stringer for FDMap.
|
|
|
|
func (f *FDMap) String() string {
|
|
|
|
f.mu.RLock()
|
|
|
|
defer f.mu.RUnlock()
|
|
|
|
|
|
|
|
var b bytes.Buffer
|
|
|
|
for k, v := range f.files {
|
|
|
|
n, _ := v.file.Dirent.FullName(nil /* root */)
|
|
|
|
b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", k, n))
|
|
|
|
}
|
|
|
|
return b.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewFDFrom allocates a new FD guaranteed to be the lowest number available
|
|
|
|
// greater than or equal to from. This property is important as Unix programs
|
|
|
|
// tend to count on this allocation order.
|
|
|
|
func (f *FDMap) NewFDFrom(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) (kdefs.FD, error) {
|
|
|
|
if fd < 0 {
|
|
|
|
// Don't accept negative FDs.
|
|
|
|
return 0, syscall.EINVAL
|
|
|
|
}
|
|
|
|
|
|
|
|
f.mu.Lock()
|
|
|
|
defer f.mu.Unlock()
|
|
|
|
|
|
|
|
// Finds the lowest fd not in the handles map.
|
|
|
|
lim := limitSet.Get(limits.NumberOfFiles)
|
|
|
|
for i := fd; lim.Cur == limits.Infinity || i < kdefs.FD(lim.Cur); i++ {
|
|
|
|
if _, ok := f.files[i]; !ok {
|
|
|
|
file.IncRef()
|
|
|
|
f.files[i] = descriptor{file, flags}
|
|
|
|
return i, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1, syscall.EMFILE
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewFDAt sets the file reference for the given FD. If there is an
|
|
|
|
// active reference for that FD, the ref count for that existing reference
|
|
|
|
// is decremented.
|
|
|
|
func (f *FDMap) NewFDAt(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) error {
|
|
|
|
if fd < 0 {
|
|
|
|
// Don't accept negative FDs.
|
|
|
|
return syscall.EBADF
|
|
|
|
}
|
|
|
|
|
|
|
|
// In this one case we do not do a defer of the Unlock. The
|
|
|
|
// reason is that we must have done all the work needed for
|
|
|
|
// discarding any old open file before we return to the
|
|
|
|
// caller. In other words, the DecRef(), below, must have
|
|
|
|
// completed by the time we return to the caller to ensure
|
|
|
|
// side effects are, in fact, effected. A classic example is
|
|
|
|
// dup2(fd1, fd2); if fd2 was already open, it must be closed,
|
|
|
|
// and we don't want to resume the caller until it is; we have
|
|
|
|
// to block on the DecRef(). Hence we can not just do a 'go
|
|
|
|
// oldfile.DecRef()', since there would be no guarantee that
|
|
|
|
// it would be done before we the caller resumed. Since we
|
|
|
|
// must wait for the DecRef() to finish, and that could take
|
|
|
|
// time, it's best to first call f.muUnlock beore so we are
|
|
|
|
// not blocking other uses of this FDMap on the DecRef() call.
|
|
|
|
f.mu.Lock()
|
|
|
|
oldDesc, oldExists := f.files[fd]
|
|
|
|
lim := limitSet.Get(limits.NumberOfFiles).Cur
|
|
|
|
// if we're closing one then the effective limit is one
|
|
|
|
// more than the actual limit.
|
|
|
|
if oldExists && lim != limits.Infinity {
|
|
|
|
lim++
|
|
|
|
}
|
|
|
|
if lim != limits.Infinity && fd >= kdefs.FD(lim) {
|
|
|
|
f.mu.Unlock()
|
|
|
|
return syscall.EMFILE
|
|
|
|
}
|
|
|
|
|
|
|
|
file.IncRef()
|
|
|
|
f.files[fd] = descriptor{file, flags}
|
|
|
|
f.mu.Unlock()
|
|
|
|
|
|
|
|
if oldExists {
|
|
|
|
oldDesc.file.DecRef()
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetFlags sets the flags for the given file descriptor, if it is valid.
|
|
|
|
func (f *FDMap) SetFlags(fd kdefs.FD, flags FDFlags) {
|
|
|
|
f.mu.Lock()
|
|
|
|
defer f.mu.Unlock()
|
|
|
|
|
|
|
|
desc, ok := f.files[fd]
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
f.files[fd] = descriptor{desc.file, flags}
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetDescriptor returns a reference to the file and the flags for the FD. It
|
|
|
|
// bumps its reference count as well. It returns nil if there is no File
|
|
|
|
// for the FD, i.e. if the FD is invalid. The caller must use DecRef
|
|
|
|
// when they are done.
|
|
|
|
func (f *FDMap) GetDescriptor(fd kdefs.FD) (*fs.File, FDFlags) {
|
|
|
|
f.mu.RLock()
|
|
|
|
defer f.mu.RUnlock()
|
|
|
|
|
|
|
|
if desc, ok := f.files[fd]; ok {
|
|
|
|
desc.file.IncRef()
|
|
|
|
return desc.file, desc.flags
|
|
|
|
}
|
|
|
|
return nil, FDFlags{}
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetFile returns a reference to the File for the FD and bumps
|
|
|
|
// its reference count as well. It returns nil if there is no File
|
|
|
|
// for the FD, i.e. if the FD is invalid. The caller must use DecRef
|
|
|
|
// when they are done.
|
|
|
|
func (f *FDMap) GetFile(fd kdefs.FD) *fs.File {
|
|
|
|
f.mu.RLock()
|
|
|
|
if desc, ok := f.files[fd]; ok {
|
|
|
|
desc.file.IncRef()
|
|
|
|
f.mu.RUnlock()
|
|
|
|
return desc.file
|
|
|
|
}
|
|
|
|
f.mu.RUnlock()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// fds returns an ordering of FDs.
|
|
|
|
func (f *FDMap) fds() FDs {
|
|
|
|
fds := make(FDs, 0, len(f.files))
|
|
|
|
for fd := range f.files {
|
|
|
|
fds = append(fds, fd)
|
|
|
|
}
|
|
|
|
sort.Sort(fds)
|
|
|
|
return fds
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetFDs returns a list of valid fds.
|
|
|
|
func (f *FDMap) GetFDs() FDs {
|
|
|
|
f.mu.RLock()
|
|
|
|
defer f.mu.RUnlock()
|
|
|
|
return f.fds()
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetRefs returns a stable slice of references to all files and bumps the
|
|
|
|
// reference count on each. The caller must use DecRef on each reference when
|
|
|
|
// they're done using the slice.
|
|
|
|
func (f *FDMap) GetRefs() []*fs.File {
|
|
|
|
f.mu.RLock()
|
|
|
|
defer f.mu.RUnlock()
|
|
|
|
|
|
|
|
fds := f.fds()
|
|
|
|
fs := make([]*fs.File, 0, len(fds))
|
|
|
|
for _, fd := range fds {
|
|
|
|
desc := f.files[fd]
|
|
|
|
desc.file.IncRef()
|
|
|
|
fs = append(fs, desc.file)
|
|
|
|
}
|
|
|
|
return fs
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fork returns an independent FDMap pointing to the same descriptors.
|
|
|
|
func (f *FDMap) Fork() *FDMap {
|
|
|
|
f.mu.RLock()
|
|
|
|
defer f.mu.RUnlock()
|
|
|
|
|
|
|
|
clone := f.k.NewFDMap()
|
|
|
|
|
|
|
|
// Grab a extra reference for every file.
|
|
|
|
for fd, desc := range f.files {
|
|
|
|
desc.file.IncRef()
|
|
|
|
clone.files[fd] = desc
|
|
|
|
}
|
|
|
|
|
|
|
|
// That's it!
|
|
|
|
return clone
|
|
|
|
}
|
|
|
|
|
|
|
|
// unlock releases all file locks held by this FDMap's uid. Must only be
|
|
|
|
// called on a non-nil *fs.File.
|
|
|
|
func (f *FDMap) unlock(file *fs.File) {
|
|
|
|
id := lock.UniqueID(f.ID())
|
|
|
|
file.Dirent.Inode.LockCtx.Posix.UnlockRegion(id, lock.LockRange{0, lock.LockEOF})
|
|
|
|
}
|
|
|
|
|
|
|
|
// inotifyFileClose generates the appropriate inotify events for f being closed.
|
|
|
|
func inotifyFileClose(f *fs.File) {
|
|
|
|
var ev uint32
|
|
|
|
d := f.Dirent
|
|
|
|
|
|
|
|
if fs.IsDir(d.Inode.StableAttr) {
|
|
|
|
ev |= linux.IN_ISDIR
|
|
|
|
}
|
|
|
|
|
|
|
|
if f.Flags().Write {
|
|
|
|
ev |= linux.IN_CLOSE_WRITE
|
|
|
|
} else {
|
|
|
|
ev |= linux.IN_CLOSE_NOWRITE
|
|
|
|
}
|
|
|
|
|
|
|
|
d.InotifyEvent(ev, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove removes an FD from the FDMap, and returns (File, true) if a File
|
|
|
|
// one was found. Callers are expected to decrement the reference count on
|
|
|
|
// the File. Otherwise returns (nil, false).
|
|
|
|
func (f *FDMap) Remove(fd kdefs.FD) (*fs.File, bool) {
|
|
|
|
f.mu.Lock()
|
|
|
|
desc := f.files[fd]
|
|
|
|
delete(f.files, fd)
|
|
|
|
f.mu.Unlock()
|
|
|
|
if desc.file != nil {
|
|
|
|
f.unlock(desc.file)
|
|
|
|
inotifyFileClose(desc.file)
|
|
|
|
return desc.file, true
|
|
|
|
}
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
|
|
|
// RemoveIf removes all FDs where cond is true.
|
|
|
|
func (f *FDMap) RemoveIf(cond func(*fs.File, FDFlags) bool) {
|
|
|
|
var removed []*fs.File
|
|
|
|
f.mu.Lock()
|
|
|
|
for fd, desc := range f.files {
|
|
|
|
if desc.file != nil && cond(desc.file, desc.flags) {
|
|
|
|
delete(f.files, fd)
|
|
|
|
removed = append(removed, desc.file)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f.mu.Unlock()
|
|
|
|
|
|
|
|
for _, file := range removed {
|
|
|
|
f.unlock(file)
|
|
|
|
inotifyFileClose(file)
|
|
|
|
file.DecRef()
|
|
|
|
}
|
|
|
|
}
|