gvisor/pkg/sentry/syscalls/linux/sys_splice.go

299 lines
8.2 KiB
Go
Raw Normal View History

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/kdefs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
// doSplice implements a blocking splice operation.
func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) {
var (
total int64
n int64
err error
ch chan struct{}
inW bool
outW bool
)
for opts.Length > 0 {
n, err = fs.Splice(t, outFile, inFile, opts)
opts.Length -= n
total += n
if err != syserror.ErrWouldBlock {
break
} else if err == syserror.ErrWouldBlock && nonBlocking {
break
}
// Are we a registered waiter?
if ch == nil {
ch = make(chan struct{}, 1)
}
if !inW && !inFile.Flags().NonBlocking {
w, _ := waiter.NewChannelEntry(ch)
inFile.EventRegister(&w, EventMaskRead)
defer inFile.EventUnregister(&w)
inW = true // Registered.
} else if !outW && !outFile.Flags().NonBlocking {
w, _ := waiter.NewChannelEntry(ch)
outFile.EventRegister(&w, EventMaskWrite)
defer outFile.EventUnregister(&w)
outW = true // Registered.
}
// Was anything registered? If no, everything is non-blocking.
if !inW && !outW {
break
}
if (!inW || inFile.Readiness(EventMaskRead) != 0) && (!outW || outFile.Readiness(EventMaskWrite) != 0) {
// Something became ready, try again without blocking.
continue
}
// Block until there's data.
if err = t.Block(ch); err != nil {
break
}
}
return total, err
}
// Sendfile implements linux system call sendfile(2).
func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
outFD := kdefs.FD(args[0].Int())
inFD := kdefs.FD(args[1].Int())
offsetAddr := args[2].Pointer()
count := int64(args[3].SizeT())
// Don't send a negative number of bytes.
if count < 0 {
return 0, nil, syserror.EINVAL
}
// Get files.
outFile := t.FDMap().GetFile(outFD)
if outFile == nil {
return 0, nil, syserror.EBADF
}
defer outFile.DecRef()
inFile := t.FDMap().GetFile(inFD)
if inFile == nil {
return 0, nil, syserror.EBADF
}
defer inFile.DecRef()
// Verify that the outfile Append flag is not set. Note that fs.Splice
// itself validates that the output file is writable.
if outFile.Flags().Append {
return 0, nil, syserror.EBADF
}
// Verify that we have a regular infile. This is a requirement; the
// same check appears in Linux (fs/splice.c:splice_direct_to_actor).
if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) {
return 0, nil, syserror.EINVAL
}
var (
n int64
err error
)
if offsetAddr != 0 {
// Verify that when offset address is not null, infile must be
// seekable. The fs.Splice routine itself validates basic read.
if !inFile.Flags().Pread {
return 0, nil, syserror.ESPIPE
}
// Copy in the offset.
var offset int64
if _, err := t.CopyIn(offsetAddr, &offset); err != nil {
return 0, nil, err
}
// The offset must be valid.
if offset < 0 {
return 0, nil, syserror.EINVAL
}
// Do the splice.
n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{
Length: count,
SrcOffset: true,
SrcStart: offset,
}, false)
// Copy out the new offset.
if _, err := t.CopyOut(offsetAddr, n+offset); err != nil {
return 0, nil, err
}
} else {
// Send data using splice.
n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{
Length: count,
}, false)
}
// We can only pass a single file to handleIOError, so pick inFile
// arbitrarily. This is used only for debugging purposes.
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile)
}
// Splice implements splice(2).
func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
inFD := kdefs.FD(args[0].Int())
inOffset := args[1].Pointer()
outFD := kdefs.FD(args[2].Int())
outOffset := args[3].Pointer()
count := int64(args[4].SizeT())
flags := args[5].Int()
// Check for invalid flags.
if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 {
return 0, nil, syserror.EINVAL
}
// Only non-blocking is meaningful. Note that unlike in Linux, this
// flag is applied consistently. We will have either fully blocking or
// non-blocking behavior below, regardless of the underlying files
// being spliced to. It's unclear if this is a bug or not yet.
nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0
// Get files.
outFile := t.FDMap().GetFile(outFD)
if outFile == nil {
return 0, nil, syserror.EBADF
}
defer outFile.DecRef()
inFile := t.FDMap().GetFile(inFD)
if inFile == nil {
return 0, nil, syserror.EBADF
}
defer inFile.DecRef()
// Construct our options.
//
// Note that exactly one of the underlying buffers must be a pipe. We
// don't actually have this constraint internally, but we enforce it
// for the semantics of the call.
opts := fs.SpliceOpts{
Length: count,
}
switch {
case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && !fs.IsPipe(outFile.Dirent.Inode.StableAttr):
if inOffset != 0 {
return 0, nil, syserror.ESPIPE
}
if outOffset != 0 {
var offset int64
if _, err := t.CopyIn(outOffset, &offset); err != nil {
return 0, nil, err
}
// Use the destination offset.
opts.DstOffset = true
opts.DstStart = offset
}
case !fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr):
if outOffset != 0 {
return 0, nil, syserror.ESPIPE
}
if inOffset != 0 {
var offset int64
if _, err := t.CopyIn(inOffset, &offset); err != nil {
return 0, nil, err
}
// Use the source offset.
opts.SrcOffset = true
opts.SrcStart = offset
}
case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr):
if inOffset != 0 || outOffset != 0 {
return 0, nil, syserror.ESPIPE
}
default:
return 0, nil, syserror.EINVAL
}
// We may not refer to the same pipe; otherwise it's a continuous loop.
if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
return 0, nil, syserror.EINVAL
}
// Splice data.
n, err := doSplice(t, outFile, inFile, opts, nonBlocking)
// See above; inFile is chosen arbitrarily here.
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile)
}
// Tee imlements tee(2).
func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
inFD := kdefs.FD(args[0].Int())
outFD := kdefs.FD(args[1].Int())
count := int64(args[2].SizeT())
flags := args[3].Int()
// Check for invalid flags.
if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 {
return 0, nil, syserror.EINVAL
}
// Only non-blocking is meaningful.
nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0
// Get files.
outFile := t.FDMap().GetFile(outFD)
if outFile == nil {
return 0, nil, syserror.EBADF
}
defer outFile.DecRef()
inFile := t.FDMap().GetFile(inFD)
if inFile == nil {
return 0, nil, syserror.EBADF
}
defer inFile.DecRef()
// All files must be pipes.
if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) {
return 0, nil, syserror.EINVAL
}
// We may not refer to the same pipe; see above.
if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
return 0, nil, syserror.EINVAL
}
// Splice data.
n, err := doSplice(t, outFile, inFile, fs.SpliceOpts{
Length: count,
Dup: true,
}, nonBlocking)
// See above; inFile is chosen arbitrarily here.
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "tee", inFile)
}