2020-01-06 20:51:35 +00:00
|
|
|
// Copyright 2019 The gVisor Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package tmpfs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"io"
|
|
|
|
"math"
|
|
|
|
"sync/atomic"
|
|
|
|
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/context"
|
|
|
|
"gvisor.dev/gvisor/pkg/safemem"
|
2020-01-06 20:51:35 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/memmap"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/usage"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
2020-01-10 06:00:42 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sync"
|
2020-01-06 20:51:35 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/usermem"
|
2020-01-06 20:51:35 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type regularFile struct {
|
|
|
|
inode inode
|
|
|
|
|
|
|
|
// memFile is a platform.File used to allocate pages to this regularFile.
|
|
|
|
memFile *pgalloc.MemoryFile
|
|
|
|
|
|
|
|
// mu protects the fields below.
|
|
|
|
mu sync.RWMutex
|
|
|
|
|
|
|
|
// data maps offsets into the file to offsets into memFile that store
|
|
|
|
// the file's data.
|
|
|
|
data fsutil.FileRangeSet
|
|
|
|
|
|
|
|
// size is the size of data, but accessed using atomic memory
|
|
|
|
// operations to avoid locking in inode.stat().
|
|
|
|
size uint64
|
|
|
|
|
|
|
|
// seals represents file seals on this inode.
|
|
|
|
seals uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
|
|
|
|
file := ®ularFile{
|
|
|
|
memFile: fs.memFile,
|
|
|
|
}
|
|
|
|
file.inode.init(file, fs, creds, mode)
|
|
|
|
file.inode.nlink = 1 // from parent directory
|
|
|
|
return &file.inode
|
|
|
|
}
|
|
|
|
|
2020-01-16 21:58:25 +00:00
|
|
|
// truncate grows or shrinks the file to the given size. It returns true if the
|
|
|
|
// file size was updated.
|
|
|
|
func (rf *regularFile) truncate(size uint64) (bool, error) {
|
|
|
|
rf.mu.Lock()
|
|
|
|
defer rf.mu.Unlock()
|
|
|
|
|
|
|
|
if size == rf.size {
|
|
|
|
// Nothing to do.
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if size > rf.size {
|
|
|
|
// Growing the file.
|
|
|
|
if rf.seals&linux.F_SEAL_GROW != 0 {
|
|
|
|
// Seal does not allow growth.
|
|
|
|
return false, syserror.EPERM
|
|
|
|
}
|
|
|
|
rf.size = size
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shrinking the file
|
|
|
|
if rf.seals&linux.F_SEAL_SHRINK != 0 {
|
|
|
|
// Seal does not allow shrink.
|
|
|
|
return false, syserror.EPERM
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(gvisor.dev/issues/1197): Invalidate mappings once we have
|
|
|
|
// mappings.
|
|
|
|
|
|
|
|
rf.data.Truncate(size, rf.memFile)
|
|
|
|
rf.size = size
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2020-01-06 20:51:35 +00:00
|
|
|
type regularFileFD struct {
|
|
|
|
fileDescription
|
|
|
|
|
|
|
|
// off is the file offset. off is accessed using atomic memory operations.
|
|
|
|
// offMu serializes operations that may mutate off.
|
|
|
|
off int64
|
|
|
|
offMu sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release implements vfs.FileDescriptionImpl.Release.
|
|
|
|
func (fd *regularFileFD) Release() {
|
2020-01-22 20:27:16 +00:00
|
|
|
// noop
|
2020-01-06 20:51:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// PRead implements vfs.FileDescriptionImpl.PRead.
|
|
|
|
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
|
|
|
|
if offset < 0 {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
if dst.NumBytes() == 0 {
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
f := fd.inode().impl.(*regularFile)
|
|
|
|
rw := getRegularFileReadWriter(f, offset)
|
|
|
|
n, err := dst.CopyOutFrom(ctx, rw)
|
|
|
|
putRegularFileReadWriter(rw)
|
|
|
|
return int64(n), err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read implements vfs.FileDescriptionImpl.Read.
|
|
|
|
func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
|
|
|
|
fd.offMu.Lock()
|
|
|
|
n, err := fd.PRead(ctx, dst, fd.off, opts)
|
|
|
|
fd.off += n
|
|
|
|
fd.offMu.Unlock()
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// PWrite implements vfs.FileDescriptionImpl.PWrite.
|
|
|
|
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
|
|
|
|
if offset < 0 {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
srclen := src.NumBytes()
|
|
|
|
if srclen == 0 {
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
f := fd.inode().impl.(*regularFile)
|
|
|
|
end := offset + srclen
|
|
|
|
if end < offset {
|
|
|
|
// Overflow.
|
|
|
|
return 0, syserror.EFBIG
|
|
|
|
}
|
|
|
|
rw := getRegularFileReadWriter(f, offset)
|
|
|
|
n, err := src.CopyInTo(ctx, rw)
|
|
|
|
putRegularFileReadWriter(rw)
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write implements vfs.FileDescriptionImpl.Write.
|
|
|
|
func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
|
|
|
|
fd.offMu.Lock()
|
|
|
|
n, err := fd.PWrite(ctx, src, fd.off, opts)
|
|
|
|
fd.off += n
|
|
|
|
fd.offMu.Unlock()
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Seek implements vfs.FileDescriptionImpl.Seek.
|
|
|
|
func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
|
|
|
|
fd.offMu.Lock()
|
|
|
|
defer fd.offMu.Unlock()
|
|
|
|
switch whence {
|
|
|
|
case linux.SEEK_SET:
|
|
|
|
// use offset as specified
|
|
|
|
case linux.SEEK_CUR:
|
|
|
|
offset += fd.off
|
|
|
|
case linux.SEEK_END:
|
|
|
|
offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size))
|
|
|
|
default:
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
if offset < 0 {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
fd.off = offset
|
|
|
|
return offset, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sync implements vfs.FileDescriptionImpl.Sync.
|
|
|
|
func (fd *regularFileFD) Sync(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// regularFileReadWriter implements safemem.Reader and Safemem.Writer.
|
|
|
|
type regularFileReadWriter struct {
|
|
|
|
file *regularFile
|
|
|
|
|
|
|
|
// Offset into the file to read/write at. Note that this may be
|
|
|
|
// different from the FD offset if PRead/PWrite is used.
|
|
|
|
off uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
var regularFileReadWriterPool = sync.Pool{
|
|
|
|
New: func() interface{} {
|
|
|
|
return ®ularFileReadWriter{}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter {
|
|
|
|
rw := regularFileReadWriterPool.Get().(*regularFileReadWriter)
|
|
|
|
rw.file = file
|
|
|
|
rw.off = uint64(offset)
|
|
|
|
return rw
|
|
|
|
}
|
|
|
|
|
|
|
|
func putRegularFileReadWriter(rw *regularFileReadWriter) {
|
|
|
|
rw.file = nil
|
|
|
|
regularFileReadWriterPool.Put(rw)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadToBlocks implements safemem.Reader.ReadToBlocks.
|
|
|
|
func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
|
|
|
|
rw.file.mu.RLock()
|
|
|
|
|
|
|
|
// Compute the range to read (limited by file size and overflow-checked).
|
|
|
|
if rw.off >= rw.file.size {
|
|
|
|
rw.file.mu.RUnlock()
|
|
|
|
return 0, io.EOF
|
|
|
|
}
|
|
|
|
end := rw.file.size
|
|
|
|
if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end {
|
|
|
|
end = rend
|
|
|
|
}
|
|
|
|
|
|
|
|
var done uint64
|
|
|
|
seg, gap := rw.file.data.Find(uint64(rw.off))
|
|
|
|
for rw.off < end {
|
|
|
|
mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
|
|
|
|
switch {
|
|
|
|
case seg.Ok():
|
|
|
|
// Get internal mappings.
|
|
|
|
ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
|
|
|
|
if err != nil {
|
|
|
|
rw.file.mu.RUnlock()
|
|
|
|
return done, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy from internal mappings.
|
|
|
|
n, err := safemem.CopySeq(dsts, ims)
|
|
|
|
done += n
|
|
|
|
rw.off += uint64(n)
|
|
|
|
dsts = dsts.DropFirst64(n)
|
|
|
|
if err != nil {
|
|
|
|
rw.file.mu.RUnlock()
|
|
|
|
return done, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Continue.
|
|
|
|
seg, gap = seg.NextNonEmpty()
|
|
|
|
|
|
|
|
case gap.Ok():
|
|
|
|
// Tmpfs holes are zero-filled.
|
|
|
|
gapmr := gap.Range().Intersect(mr)
|
|
|
|
dst := dsts.TakeFirst64(gapmr.Length())
|
|
|
|
n, err := safemem.ZeroSeq(dst)
|
|
|
|
done += n
|
|
|
|
rw.off += uint64(n)
|
|
|
|
dsts = dsts.DropFirst64(n)
|
|
|
|
if err != nil {
|
|
|
|
rw.file.mu.RUnlock()
|
|
|
|
return done, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Continue.
|
|
|
|
seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rw.file.mu.RUnlock()
|
|
|
|
return done, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
|
|
|
|
func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
|
|
|
|
rw.file.mu.Lock()
|
|
|
|
|
|
|
|
// Compute the range to write (overflow-checked).
|
|
|
|
end := rw.off + srcs.NumBytes()
|
|
|
|
if end <= rw.off {
|
|
|
|
end = math.MaxInt64
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if seals prevent either file growth or all writes.
|
|
|
|
switch {
|
|
|
|
case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed
|
|
|
|
rw.file.mu.Unlock()
|
|
|
|
return 0, syserror.EPERM
|
|
|
|
case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed
|
|
|
|
// When growth is sealed, Linux effectively allows writes which would
|
|
|
|
// normally grow the file to partially succeed up to the current EOF,
|
|
|
|
// rounded down to the page boundary before the EOF.
|
|
|
|
//
|
|
|
|
// This happens because writes (and thus the growth check) for tmpfs
|
|
|
|
// files proceed page-by-page on Linux, and the final write to the page
|
|
|
|
// containing EOF fails, resulting in a partial write up to the start of
|
|
|
|
// that page.
|
|
|
|
//
|
|
|
|
// To emulate this behaviour, artifically truncate the write to the
|
|
|
|
// start of the page containing the current EOF.
|
|
|
|
//
|
|
|
|
// See Linux, mm/filemap.c:generic_perform_write() and
|
|
|
|
// mm/shmem.c:shmem_write_begin().
|
|
|
|
if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart {
|
|
|
|
end = pgstart
|
|
|
|
}
|
|
|
|
if end <= rw.off {
|
|
|
|
// Truncation would result in no data being written.
|
|
|
|
rw.file.mu.Unlock()
|
|
|
|
return 0, syserror.EPERM
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Page-aligned mr for when we need to allocate memory. RoundUp can't
|
|
|
|
// overflow since end is an int64.
|
|
|
|
pgstartaddr := usermem.Addr(rw.off).RoundDown()
|
|
|
|
pgendaddr, _ := usermem.Addr(end).RoundUp()
|
|
|
|
pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)}
|
|
|
|
|
|
|
|
var (
|
|
|
|
done uint64
|
|
|
|
retErr error
|
|
|
|
)
|
|
|
|
seg, gap := rw.file.data.Find(uint64(rw.off))
|
|
|
|
for rw.off < end {
|
|
|
|
mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
|
|
|
|
switch {
|
|
|
|
case seg.Ok():
|
|
|
|
// Get internal mappings.
|
|
|
|
ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write)
|
|
|
|
if err != nil {
|
|
|
|
retErr = err
|
|
|
|
goto exitLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy to internal mappings.
|
|
|
|
n, err := safemem.CopySeq(ims, srcs)
|
|
|
|
done += n
|
|
|
|
rw.off += uint64(n)
|
|
|
|
srcs = srcs.DropFirst64(n)
|
|
|
|
if err != nil {
|
|
|
|
retErr = err
|
|
|
|
goto exitLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Continue.
|
|
|
|
seg, gap = seg.NextNonEmpty()
|
|
|
|
|
|
|
|
case gap.Ok():
|
|
|
|
// Allocate memory for the write.
|
|
|
|
gapMR := gap.Range().Intersect(pgMR)
|
|
|
|
fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs)
|
|
|
|
if err != nil {
|
|
|
|
retErr = err
|
|
|
|
goto exitLoop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write to that memory as usual.
|
|
|
|
seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
exitLoop:
|
|
|
|
// If the write ends beyond the file's previous size, it causes the
|
|
|
|
// file to grow.
|
|
|
|
if rw.off > rw.file.size {
|
|
|
|
atomic.StoreUint64(&rw.file.size, rw.off)
|
|
|
|
}
|
|
|
|
|
|
|
|
rw.file.mu.Unlock()
|
|
|
|
return done, retErr
|
|
|
|
}
|