gvisor/pkg/sentry/fs/gofer/file.go

256 lines
8.7 KiB
Go

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gofer
import (
"syscall"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/metric"
"gvisor.googlesource.com/gvisor/pkg/p9"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/device"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
"gvisor.googlesource.com/gvisor/pkg/waiter"
)
var openedWX = metric.MustCreateNewUint64Metric("/gofer/opened_write_execute_file", true /* sync */, "Number of times a writable+executable file was opened from a gofer.")
// fileOperations implements fs.FileOperations for a remote file system.
type fileOperations struct {
fsutil.NoIoctl `state:"nosave"`
waiter.AlwaysReady `state:"nosave"`
// inodeOperations is the inodeOperations backing the file. It is protected
// by a reference held by File.Dirent.Inode which is stable until
// FileOperations.Release is called.
inodeOperations *inodeOperations `state:"wait"`
// dirCursor is the directory cursor.
dirCursor string
// handles are the opened remote file system handles, which may
// be shared with other files.
handles *handles `state:"nosave"`
// flags are the flags used to open handles.
flags fs.FileFlags `state:"wait"`
}
// fileOperations implements fs.FileOperations.
var _ fs.FileOperations = (*fileOperations)(nil)
// NewFile returns a file. NewFile is not appropriate with host pipes and sockets.
func NewFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, i *inodeOperations, handles *handles) *fs.File {
// Remote file systems enforce readability/writability at an offset,
// see fs/9p/vfs_inode.c:v9fs_vfs_atomic_open -> fs/open.c:finish_open.
flags.Pread = true
flags.Pwrite = true
f := &fileOperations{
inodeOperations: i,
handles: handles,
flags: flags,
}
if flags.Write {
if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Execute: true}); err == nil {
name, _ := dirent.FullName(fs.RootFromContext(ctx))
openedWX.Increment()
log.Warningf("Opened a writable executable: %q", name)
}
}
return fs.NewFile(ctx, dirent, flags, f)
}
// Release implements fs.FileOpeations.Release.
func (f *fileOperations) Release() {
f.handles.DecRef()
}
// Readdir implements fs.FileOperations.Readdir.
func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
root := fs.RootFromContext(ctx)
defer root.DecRef()
dirCtx := &fs.DirCtx{
Serializer: serializer,
DirCursor: &f.dirCursor,
}
n, err := fs.DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
if f.inodeOperations.session().cachePolicy != cacheNone {
f.inodeOperations.cachingInodeOps.TouchAccessTime(ctx, file.Dirent.Inode)
}
return n, err
}
// IterateDir implements fs.DirIterator.IterateDir.
func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
f.inodeOperations.readdirMu.Lock()
defer f.inodeOperations.readdirMu.Unlock()
// Fetch directory entries if needed.
if f.inodeOperations.readdirCache == nil || f.inodeOperations.session().cachePolicy == cacheNone {
entries, err := f.readdirAll(ctx)
if err != nil {
return offset, err
}
// Cache the readdir result.
f.inodeOperations.readdirCache = fs.NewSortedDentryMap(entries)
}
// Serialize the entries.
n, err := fs.GenericReaddir(dirCtx, f.inodeOperations.readdirCache)
return offset + n, err
}
// readdirAll fetches fs.DentAttrs for f, using the attributes of g.
func (f *fileOperations) readdirAll(ctx context.Context) (map[string]fs.DentAttr, error) {
entries := make(map[string]fs.DentAttr)
var readOffset uint64
for {
// We choose some arbitrary high number of directory entries (64k) and call
// Readdir until we've exhausted them all.
dirents, err := f.handles.File.readdir(ctx, readOffset, 64*1024)
if err != nil {
return nil, err
}
if len(dirents) == 0 {
// We're done, we reached EOF.
break
}
// The last dirent contains the offset into the next set of dirents. The gofer
// returns the offset as an index into directories, not as a byte offset, because
// converting a byte offset to an index into directories entries is a huge pain.
// But everything is fine if we're consistent.
readOffset = dirents[len(dirents)-1].Offset
for _, dirent := range dirents {
if dirent.Name == "." || dirent.Name == ".." {
// These must not be included in Readdir results.
continue
}
// Find a best approximation of the type.
var nt fs.InodeType
switch dirent.Type {
case p9.TypeDir:
nt = fs.Directory
case p9.TypeSymlink:
nt = fs.Symlink
default:
nt = fs.RegularFile
}
// Install the DentAttr.
entries[dirent.Name] = fs.DentAttr{
Type: nt,
// Construct the key to find the virtual inode.
// Directory entries reside on the same Device
// and SecondaryDevice as their parent.
InodeID: goferDevice.Map(device.MultiDeviceKey{
Device: f.inodeOperations.fileState.key.Device,
SecondaryDevice: f.inodeOperations.fileState.key.SecondaryDevice,
Inode: dirent.QID.Path,
}),
}
}
}
return entries, nil
}
// Write implements fs.FileOperations.Write.
func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
if fs.IsDir(file.Dirent.Inode.StableAttr) {
// Not all remote file systems enforce this so this client does.
return 0, syserror.EISDIR
}
// Do cached IO for regular files only. Some character devices expect no caching.
isFile := fs.IsFile(file.Dirent.Inode.StableAttr)
if f.inodeOperations.session().cachePolicy == cacheNone || !isFile {
return src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
}
return f.inodeOperations.cachingInodeOps.Write(ctx, src, offset)
}
// Read implements fs.FileOperations.Read.
func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
if fs.IsDir(file.Dirent.Inode.StableAttr) {
// Not all remote file systems enforce this so this client does.
return 0, syserror.EISDIR
}
// Do cached IO for regular files only. Some character devices expect no caching.
isFile := fs.IsFile(file.Dirent.Inode.StableAttr)
if f.inodeOperations.session().cachePolicy == cacheNone || !isFile {
return dst.CopyOutFrom(ctx, f.handles.readWriterAt(ctx, offset))
}
return f.inodeOperations.cachingInodeOps.Read(ctx, file, dst, offset)
}
// Fsync implements fs.FileOperations.Fsync.
func (f *fileOperations) Fsync(ctx context.Context, file *fs.File, start int64, end int64, syncType fs.SyncType) error {
switch syncType {
case fs.SyncAll, fs.SyncData:
if err := file.Dirent.Inode.WriteOut(ctx); err != nil {
return err
}
fallthrough
case fs.SyncBackingStorage:
// Sync remote caches.
if f.handles.Host != nil {
// Sync the host fd directly.
return syscall.Fsync(f.handles.Host.FD())
}
// Otherwise sync on the p9.File handle.
return f.handles.File.fsync(ctx)
}
panic("invalid sync type")
}
// Flush implements fs.FileOperations.Flush.
func (f *fileOperations) Flush(ctx context.Context, file *fs.File) error {
// If this file is not opened writable then there is nothing to flush.
// We do this because some p9 server implementations of Flush are
// over-zealous.
//
// FIXME: weaken these implementations and remove this check.
if !file.Flags().Write {
return nil
}
// Execute the flush.
return f.handles.File.flush(ctx)
}
// ConfigureMMap implements fs.FileOperations.ConfigureMMap.
func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error {
if !isFileCachable(f.inodeOperations.session(), file.Dirent.Inode) {
return syserror.ENODEV
}
return fsutil.GenericConfigureMMap(file, f.inodeOperations.cachingInodeOps, opts)
}
// Seek implements fs.FileOperations.Seek.
func (f *fileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &f.dirCursor)
}