gvisor/pkg/sentry/fsimpl/fuse/regular_file.go

231 lines
6.3 KiB
Go

// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fuse
import (
"io"
"math"
"sync"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
type regularFileFD struct {
fileDescription
// off is the file offset.
off int64
// offMu protects off.
offMu sync.Mutex
}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
if offset < 0 {
return 0, syserror.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
size := dst.NumBytes()
if size == 0 {
// Early return if count is 0.
return 0, nil
} else if size > math.MaxUint32 {
// FUSE only supports uint32 for size.
// Overflow.
return 0, syserror.EINVAL
}
// TODO(gvisor.dev/issue/3678): Add direct IO support.
inode := fd.inode()
// Reading beyond EOF, update file size if outdated.
if uint64(offset+size) > atomic.LoadUint64(&inode.size) {
if err := inode.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil {
return 0, err
}
// If the offset after update is still too large, return error.
if uint64(offset) >= atomic.LoadUint64(&inode.size) {
return 0, io.EOF
}
}
// Truncate the read with updated file size.
fileSize := atomic.LoadUint64(&inode.size)
if uint64(offset+size) > fileSize {
size = int64(fileSize) - offset
}
buffers, n, err := inode.fs.ReadInPages(ctx, fd, uint64(offset), uint32(size))
if err != nil {
return 0, err
}
// TODO(gvisor.dev/issue/3237): support indirect IO (e.g. caching),
// store the bytes that were read ahead.
// Update the number of bytes to copy for short read.
if n < uint32(size) {
size = int64(n)
}
// Copy the bytes read to the dst.
// This loop is intended for fragmented reads.
// For the majority of reads, this loop only execute once.
var copied int64
for _, buffer := range buffers {
toCopy := int64(len(buffer))
if copied+toCopy > size {
toCopy = size - copied
}
cp, err := dst.DropFirst64(copied).CopyOut(ctx, buffer[:toCopy])
if err != nil {
return 0, err
}
if int64(cp) != toCopy {
return 0, syserror.EIO
}
copied += toCopy
}
return copied, nil
}
// Read implements vfs.FileDescriptionImpl.Read.
func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
fd.offMu.Lock()
n, err := fd.PRead(ctx, dst, fd.off, opts)
fd.off += n
fd.offMu.Unlock()
return n, err
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
n, _, err := fd.pwrite(ctx, src, offset, opts)
return n, err
}
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
fd.offMu.Lock()
n, off, err := fd.pwrite(ctx, src, fd.off, opts)
fd.off = off
fd.offMu.Unlock()
return n, err
}
// pwrite returns the number of bytes written, final offset and error. The
// final offset should be ignored by PWrite.
func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if offset < 0 {
return 0, offset, syserror.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, offset, syserror.EOPNOTSUPP
}
inode := fd.inode()
inode.metadataMu.Lock()
defer inode.metadataMu.Unlock()
// If the file is opened with O_APPEND, update offset to file size.
// Note: since our Open() implements the interface of kernfs,
// and kernfs currently does not support O_APPEND, this will never
// be true before we switch out from kernfs.
if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
// Locking inode.metadataMu is sufficient for reading size
offset = int64(inode.size)
}
srclen := src.NumBytes()
if srclen > math.MaxUint32 {
// FUSE only supports uint32 for size.
// Overflow.
return 0, offset, syserror.EINVAL
}
if end := offset + srclen; end < offset {
// Overflow.
return 0, offset, syserror.EINVAL
}
srclen, err = vfs.CheckLimit(ctx, offset, srclen)
if err != nil {
return 0, offset, err
}
if srclen == 0 {
// Return before causing any side effects.
return 0, offset, nil
}
src = src.TakeFirst64(srclen)
// TODO(gvisor.dev/issue/3237): Add cache support:
// buffer cache. Ideally we write from src to our buffer cache first.
// The slice passed to fs.Write() should be a slice from buffer cache.
data := make([]byte, srclen)
// Reason for making a copy here: connection.Call() blocks on kerneltask,
// which in turn acquires mm.activeMu lock. Functions like CopyInTo() will
// attemp to acquire the mm.activeMu lock as well -> deadlock.
// We must finish reading from the userspace memory before
// t.Block() deactivates it.
cp, err := src.CopyIn(ctx, data)
if err != nil {
return 0, offset, err
}
if int64(cp) != srclen {
return 0, offset, syserror.EIO
}
n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data)
if err != nil {
return 0, offset, err
}
if n == 0 {
// We have checked srclen != 0 previously.
// If err == nil, then it's a short write and we return EIO.
return 0, offset, syserror.EIO
}
written = int64(n)
finalOff = offset + written
if finalOff > int64(inode.size) {
atomic.StoreUint64(&inode.size, uint64(finalOff))
atomic.AddUint64(&inode.fs.conn.attributeVersion, 1)
}
return
}