gvisor/pkg/sentry/fsimpl/tmpfs/regular_file.go

404 lines
11 KiB
Go

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tmpfs
import (
"io"
"math"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
type regularFile struct {
inode inode
// memFile is a platform.File used to allocate pages to this regularFile.
memFile *pgalloc.MemoryFile
// mu protects the fields below.
mu sync.RWMutex
// data maps offsets into the file to offsets into memFile that store
// the file's data.
data fsutil.FileRangeSet
// size is the size of data, but accessed using atomic memory
// operations to avoid locking in inode.stat().
size uint64
// seals represents file seals on this inode.
seals uint32
}
func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
file := &regularFile{
memFile: fs.memFile,
}
file.inode.init(file, fs, creds, mode)
file.inode.nlink = 1 // from parent directory
return &file.inode
}
// truncate grows or shrinks the file to the given size. It returns true if the
// file size was updated.
func (rf *regularFile) truncate(size uint64) (bool, error) {
rf.mu.Lock()
defer rf.mu.Unlock()
if size == rf.size {
// Nothing to do.
return false, nil
}
if size > rf.size {
// Growing the file.
if rf.seals&linux.F_SEAL_GROW != 0 {
// Seal does not allow growth.
return false, syserror.EPERM
}
rf.size = size
return true, nil
}
// Shrinking the file
if rf.seals&linux.F_SEAL_SHRINK != 0 {
// Seal does not allow shrink.
return false, syserror.EPERM
}
// TODO(gvisor.dev/issues/1197): Invalidate mappings once we have
// mappings.
rf.data.Truncate(size, rf.memFile)
rf.size = size
return true, nil
}
type regularFileFD struct {
fileDescription
// off is the file offset. off is accessed using atomic memory operations.
// offMu serializes operations that may mutate off.
off int64
offMu sync.Mutex
}
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *regularFileFD) Release() {
// noop
}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
if offset < 0 {
return 0, syserror.EINVAL
}
if dst.NumBytes() == 0 {
return 0, nil
}
f := fd.inode().impl.(*regularFile)
rw := getRegularFileReadWriter(f, offset)
n, err := dst.CopyOutFrom(ctx, rw)
putRegularFileReadWriter(rw)
return int64(n), err
}
// Read implements vfs.FileDescriptionImpl.Read.
func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
fd.offMu.Lock()
n, err := fd.PRead(ctx, dst, fd.off, opts)
fd.off += n
fd.offMu.Unlock()
return n, err
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
if offset < 0 {
return 0, syserror.EINVAL
}
srclen := src.NumBytes()
if srclen == 0 {
return 0, nil
}
f := fd.inode().impl.(*regularFile)
end := offset + srclen
if end < offset {
// Overflow.
return 0, syserror.EFBIG
}
rw := getRegularFileReadWriter(f, offset)
n, err := src.CopyInTo(ctx, rw)
putRegularFileReadWriter(rw)
return n, err
}
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
fd.offMu.Lock()
n, err := fd.PWrite(ctx, src, fd.off, opts)
fd.off += n
fd.offMu.Unlock()
return n, err
}
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
fd.offMu.Lock()
defer fd.offMu.Unlock()
switch whence {
case linux.SEEK_SET:
// use offset as specified
case linux.SEEK_CUR:
offset += fd.off
case linux.SEEK_END:
offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size))
default:
return 0, syserror.EINVAL
}
if offset < 0 {
return 0, syserror.EINVAL
}
fd.off = offset
return offset, nil
}
// Sync implements vfs.FileDescriptionImpl.Sync.
func (fd *regularFileFD) Sync(ctx context.Context) error {
return nil
}
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
func (fd *regularFileFD) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error {
return fd.inode().lockBSD(uid, t, block)
}
// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
func (fd *regularFileFD) UnlockBSD(ctx context.Context, uid lock.UniqueID) error {
fd.inode().unlockBSD(uid)
return nil
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error {
return fd.inode().lockPOSIX(uid, t, rng, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error {
fd.inode().unlockPOSIX(uid, rng)
return nil
}
// regularFileReadWriter implements safemem.Reader and Safemem.Writer.
type regularFileReadWriter struct {
file *regularFile
// Offset into the file to read/write at. Note that this may be
// different from the FD offset if PRead/PWrite is used.
off uint64
}
var regularFileReadWriterPool = sync.Pool{
New: func() interface{} {
return &regularFileReadWriter{}
},
}
func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter {
rw := regularFileReadWriterPool.Get().(*regularFileReadWriter)
rw.file = file
rw.off = uint64(offset)
return rw
}
func putRegularFileReadWriter(rw *regularFileReadWriter) {
rw.file = nil
regularFileReadWriterPool.Put(rw)
}
// ReadToBlocks implements safemem.Reader.ReadToBlocks.
func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
rw.file.mu.RLock()
// Compute the range to read (limited by file size and overflow-checked).
if rw.off >= rw.file.size {
rw.file.mu.RUnlock()
return 0, io.EOF
}
end := rw.file.size
if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end {
end = rend
}
var done uint64
seg, gap := rw.file.data.Find(uint64(rw.off))
for rw.off < end {
mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
switch {
case seg.Ok():
// Get internal mappings.
ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
if err != nil {
rw.file.mu.RUnlock()
return done, err
}
// Copy from internal mappings.
n, err := safemem.CopySeq(dsts, ims)
done += n
rw.off += uint64(n)
dsts = dsts.DropFirst64(n)
if err != nil {
rw.file.mu.RUnlock()
return done, err
}
// Continue.
seg, gap = seg.NextNonEmpty()
case gap.Ok():
// Tmpfs holes are zero-filled.
gapmr := gap.Range().Intersect(mr)
dst := dsts.TakeFirst64(gapmr.Length())
n, err := safemem.ZeroSeq(dst)
done += n
rw.off += uint64(n)
dsts = dsts.DropFirst64(n)
if err != nil {
rw.file.mu.RUnlock()
return done, err
}
// Continue.
seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
}
}
rw.file.mu.RUnlock()
return done, nil
}
// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
rw.file.mu.Lock()
// Compute the range to write (overflow-checked).
end := rw.off + srcs.NumBytes()
if end <= rw.off {
end = math.MaxInt64
}
// Check if seals prevent either file growth or all writes.
switch {
case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed
rw.file.mu.Unlock()
return 0, syserror.EPERM
case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed
// When growth is sealed, Linux effectively allows writes which would
// normally grow the file to partially succeed up to the current EOF,
// rounded down to the page boundary before the EOF.
//
// This happens because writes (and thus the growth check) for tmpfs
// files proceed page-by-page on Linux, and the final write to the page
// containing EOF fails, resulting in a partial write up to the start of
// that page.
//
// To emulate this behaviour, artifically truncate the write to the
// start of the page containing the current EOF.
//
// See Linux, mm/filemap.c:generic_perform_write() and
// mm/shmem.c:shmem_write_begin().
if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart {
end = pgstart
}
if end <= rw.off {
// Truncation would result in no data being written.
rw.file.mu.Unlock()
return 0, syserror.EPERM
}
}
// Page-aligned mr for when we need to allocate memory. RoundUp can't
// overflow since end is an int64.
pgstartaddr := usermem.Addr(rw.off).RoundDown()
pgendaddr, _ := usermem.Addr(end).RoundUp()
pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)}
var (
done uint64
retErr error
)
seg, gap := rw.file.data.Find(uint64(rw.off))
for rw.off < end {
mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
switch {
case seg.Ok():
// Get internal mappings.
ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write)
if err != nil {
retErr = err
goto exitLoop
}
// Copy to internal mappings.
n, err := safemem.CopySeq(ims, srcs)
done += n
rw.off += uint64(n)
srcs = srcs.DropFirst64(n)
if err != nil {
retErr = err
goto exitLoop
}
// Continue.
seg, gap = seg.NextNonEmpty()
case gap.Ok():
// Allocate memory for the write.
gapMR := gap.Range().Intersect(pgMR)
fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs)
if err != nil {
retErr = err
goto exitLoop
}
// Write to that memory as usual.
seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{}
}
}
exitLoop:
// If the write ends beyond the file's previous size, it causes the
// file to grow.
if rw.off > rw.file.size {
atomic.StoreUint64(&rw.file.size, rw.off)
}
rw.file.mu.Unlock()
return done, retErr
}