231 lines
6.3 KiB
Go
231 lines
6.3 KiB
Go
// Copyright 2020 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package fuse
|
|
|
|
import (
|
|
"io"
|
|
"math"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
"gvisor.dev/gvisor/pkg/context"
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
"gvisor.dev/gvisor/pkg/usermem"
|
|
)
|
|
|
|
type regularFileFD struct {
|
|
fileDescription
|
|
|
|
// off is the file offset.
|
|
off int64
|
|
// offMu protects off.
|
|
offMu sync.Mutex
|
|
}
|
|
|
|
// PRead implements vfs.FileDescriptionImpl.PRead.
|
|
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
|
|
if offset < 0 {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
|
|
// Check that flags are supported.
|
|
//
|
|
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
|
|
if opts.Flags&^linux.RWF_HIPRI != 0 {
|
|
return 0, syserror.EOPNOTSUPP
|
|
}
|
|
|
|
size := dst.NumBytes()
|
|
if size == 0 {
|
|
// Early return if count is 0.
|
|
return 0, nil
|
|
} else if size > math.MaxUint32 {
|
|
// FUSE only supports uint32 for size.
|
|
// Overflow.
|
|
return 0, syserror.EINVAL
|
|
}
|
|
|
|
// TODO(gvisor.dev/issue/3678): Add direct IO support.
|
|
|
|
inode := fd.inode()
|
|
|
|
// Reading beyond EOF, update file size if outdated.
|
|
if uint64(offset+size) > atomic.LoadUint64(&inode.size) {
|
|
if err := inode.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil {
|
|
return 0, err
|
|
}
|
|
// If the offset after update is still too large, return error.
|
|
if uint64(offset) >= atomic.LoadUint64(&inode.size) {
|
|
return 0, io.EOF
|
|
}
|
|
}
|
|
|
|
// Truncate the read with updated file size.
|
|
fileSize := atomic.LoadUint64(&inode.size)
|
|
if uint64(offset+size) > fileSize {
|
|
size = int64(fileSize) - offset
|
|
}
|
|
|
|
buffers, n, err := inode.fs.ReadInPages(ctx, fd, uint64(offset), uint32(size))
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
// TODO(gvisor.dev/issue/3237): support indirect IO (e.g. caching),
|
|
// store the bytes that were read ahead.
|
|
|
|
// Update the number of bytes to copy for short read.
|
|
if n < uint32(size) {
|
|
size = int64(n)
|
|
}
|
|
|
|
// Copy the bytes read to the dst.
|
|
// This loop is intended for fragmented reads.
|
|
// For the majority of reads, this loop only execute once.
|
|
var copied int64
|
|
for _, buffer := range buffers {
|
|
toCopy := int64(len(buffer))
|
|
if copied+toCopy > size {
|
|
toCopy = size - copied
|
|
}
|
|
cp, err := dst.DropFirst64(copied).CopyOut(ctx, buffer[:toCopy])
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if int64(cp) != toCopy {
|
|
return 0, syserror.EIO
|
|
}
|
|
copied += toCopy
|
|
}
|
|
|
|
return copied, nil
|
|
}
|
|
|
|
// Read implements vfs.FileDescriptionImpl.Read.
|
|
func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
|
|
fd.offMu.Lock()
|
|
n, err := fd.PRead(ctx, dst, fd.off, opts)
|
|
fd.off += n
|
|
fd.offMu.Unlock()
|
|
return n, err
|
|
}
|
|
|
|
// PWrite implements vfs.FileDescriptionImpl.PWrite.
|
|
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
|
|
n, _, err := fd.pwrite(ctx, src, offset, opts)
|
|
return n, err
|
|
}
|
|
|
|
// Write implements vfs.FileDescriptionImpl.Write.
|
|
func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
|
|
fd.offMu.Lock()
|
|
n, off, err := fd.pwrite(ctx, src, fd.off, opts)
|
|
fd.off = off
|
|
fd.offMu.Unlock()
|
|
return n, err
|
|
}
|
|
|
|
// pwrite returns the number of bytes written, final offset and error. The
|
|
// final offset should be ignored by PWrite.
|
|
func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
|
|
if offset < 0 {
|
|
return 0, offset, syserror.EINVAL
|
|
}
|
|
|
|
// Check that flags are supported.
|
|
//
|
|
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
|
|
if opts.Flags&^linux.RWF_HIPRI != 0 {
|
|
return 0, offset, syserror.EOPNOTSUPP
|
|
}
|
|
|
|
inode := fd.inode()
|
|
inode.metadataMu.Lock()
|
|
defer inode.metadataMu.Unlock()
|
|
|
|
// If the file is opened with O_APPEND, update offset to file size.
|
|
// Note: since our Open() implements the interface of kernfs,
|
|
// and kernfs currently does not support O_APPEND, this will never
|
|
// be true before we switch out from kernfs.
|
|
if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
|
|
// Locking inode.metadataMu is sufficient for reading size
|
|
offset = int64(inode.size)
|
|
}
|
|
|
|
srclen := src.NumBytes()
|
|
|
|
if srclen > math.MaxUint32 {
|
|
// FUSE only supports uint32 for size.
|
|
// Overflow.
|
|
return 0, offset, syserror.EINVAL
|
|
}
|
|
if end := offset + srclen; end < offset {
|
|
// Overflow.
|
|
return 0, offset, syserror.EINVAL
|
|
}
|
|
|
|
srclen, err = vfs.CheckLimit(ctx, offset, srclen)
|
|
if err != nil {
|
|
return 0, offset, err
|
|
}
|
|
|
|
if srclen == 0 {
|
|
// Return before causing any side effects.
|
|
return 0, offset, nil
|
|
}
|
|
|
|
src = src.TakeFirst64(srclen)
|
|
|
|
// TODO(gvisor.dev/issue/3237): Add cache support:
|
|
// buffer cache. Ideally we write from src to our buffer cache first.
|
|
// The slice passed to fs.Write() should be a slice from buffer cache.
|
|
data := make([]byte, srclen)
|
|
// Reason for making a copy here: connection.Call() blocks on kerneltask,
|
|
// which in turn acquires mm.activeMu lock. Functions like CopyInTo() will
|
|
// attemp to acquire the mm.activeMu lock as well -> deadlock.
|
|
// We must finish reading from the userspace memory before
|
|
// t.Block() deactivates it.
|
|
cp, err := src.CopyIn(ctx, data)
|
|
if err != nil {
|
|
return 0, offset, err
|
|
}
|
|
if int64(cp) != srclen {
|
|
return 0, offset, syserror.EIO
|
|
}
|
|
|
|
n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data)
|
|
if err != nil {
|
|
return 0, offset, err
|
|
}
|
|
|
|
if n == 0 {
|
|
// We have checked srclen != 0 previously.
|
|
// If err == nil, then it's a short write and we return EIO.
|
|
return 0, offset, syserror.EIO
|
|
}
|
|
|
|
written = int64(n)
|
|
finalOff = offset + written
|
|
|
|
if finalOff > int64(inode.size) {
|
|
atomic.StoreUint64(&inode.size, uint64(finalOff))
|
|
atomic.AddUint64(&inode.fs.conn.attributeVersion, 1)
|
|
}
|
|
|
|
return
|
|
}
|