gvisor/pkg/sentry/syscalls/linux/sys_write.go

365 lines
9.2 KiB
Go

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package linux
import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
// LINT.IfChange
const (
// EventMaskWrite contains events that can be triggered on writes.
//
// Note that EventHUp is not going to happen for pipes but may for
// implementations of poll on some sockets, see net/core/datagram.c.
EventMaskWrite = waiter.EventOut | waiter.EventHUp | waiter.EventErr
)
// Write implements linux syscall write(2).
func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
size := args[2].SizeT()
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Check that the file is writable.
if !file.Flags().Write {
return 0, nil, syserror.EBADF
}
// Check that the size is legitimate.
si := int(size)
if si < 0 {
return 0, nil, syserror.EINVAL
}
// Get the source of the write.
src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, nil, err
}
n, err := writev(t, file, src)
t.IOUsage().AccountWriteSyscall(n)
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "write", file)
}
// Pwrite64 implements linux syscall pwrite64(2).
func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
size := args[2].SizeT()
offset := args[3].Int64()
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Check that the offset is legitimate and does not overflow.
if offset < 0 || offset+int64(size) < 0 {
return 0, nil, syserror.EINVAL
}
// Is writing at an offset supported?
if !file.Flags().Pwrite {
return 0, nil, syserror.ESPIPE
}
// Check that the file is writable.
if !file.Flags().Write {
return 0, nil, syserror.EBADF
}
// Check that the size is legitimate.
si := int(size)
if si < 0 {
return 0, nil, syserror.EINVAL
}
// Get the source of the write.
src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, nil, err
}
n, err := pwritev(t, file, src, offset)
t.IOUsage().AccountWriteSyscall(n)
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwrite64", file)
}
// Writev implements linux syscall writev(2).
func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Check that the file is writable.
if !file.Flags().Write {
return 0, nil, syserror.EBADF
}
// Read the iovecs that specify the source of the write.
src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, nil, err
}
n, err := writev(t, file, src)
t.IOUsage().AccountWriteSyscall(n)
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "writev", file)
}
// Pwritev implements linux syscall pwritev(2).
func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
offset := args[3].Int64()
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Check that the offset is legitimate.
if offset < 0 {
return 0, nil, syserror.EINVAL
}
// Is writing at an offset supported?
if !file.Flags().Pwrite {
return 0, nil, syserror.ESPIPE
}
// Check that the file is writable.
if !file.Flags().Write {
return 0, nil, syserror.EBADF
}
// Read the iovecs that specify the source of the write.
src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, nil, err
}
n, err := pwritev(t, file, src, offset)
t.IOUsage().AccountWriteSyscall(n)
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwritev", file)
}
// Pwritev2 implements linux syscall pwritev2(2).
func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
// While the syscall is
// pwritev2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
// the linux internal call
// (https://elixir.bootlin.com/linux/v4.18/source/fs/read_write.c#L1354)
// splits the offset argument into a high/low value for compatibility with
// 32-bit architectures. The flags argument is the 5th argument.
fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
offset := args[3].Int64()
flags := int(args[5].Int())
if int(args[4].Int())&0x4 == 1 {
return 0, nil, syserror.EACCES
}
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Check that the offset is legitimate.
if offset < -1 {
return 0, nil, syserror.EINVAL
}
// Is writing at an offset supported?
if offset > -1 && !file.Flags().Pwrite {
return 0, nil, syserror.ESPIPE
}
// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
// accepted as a valid flag argument for pwritev2.
if flags&^linux.RWF_VALID != 0 {
return uintptr(flags), nil, syserror.EOPNOTSUPP
}
// Check that the file is writeable.
if !file.Flags().Write {
return 0, nil, syserror.EBADF
}
// Read the iovecs that specify the source of the write.
src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, nil, err
}
// If pwritev2 is called with an offset of -1, writev is called.
if offset == -1 {
n, err := writev(t, file, src)
t.IOUsage().AccountWriteSyscall(n)
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwritev2", file)
}
n, err := pwritev(t, file, src, offset)
t.IOUsage().AccountWriteSyscall(n)
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "pwritev2", file)
}
func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
n, err := f.Writev(t, src)
if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
if n > 0 {
// Queue notification if we wrote anything.
f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
}
return n, err
}
// Sockets support write timeouts.
var haveDeadline bool
var deadline ktime.Time
if s, ok := f.FileOperations.(socket.Socket); ok {
dl := s.SendTimeout()
if dl < 0 && err == syserror.ErrWouldBlock {
return n, err
}
if dl > 0 {
deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
haveDeadline = true
}
}
// Register for notifications.
w, ch := waiter.NewChannelEntry(nil)
f.EventRegister(&w, EventMaskWrite)
total := n
for {
// Shorten src to reflect bytes previously written.
src = src.DropFirst64(n)
// Issue the request and break out if it completes with
// anything other than "would block".
n, err = f.Writev(t, src)
total += n
if err != syserror.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
if err == syserror.ETIMEDOUT {
err = syserror.ErrWouldBlock
}
break
}
}
f.EventUnregister(&w)
if total > 0 {
// Queue notification if we wrote anything.
f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
}
return total, err
}
func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
n, err := f.Pwritev(t, src, offset)
if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
if n > 0 {
// Queue notification if we wrote anything.
f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
}
return n, err
}
// Register for notifications.
w, ch := waiter.NewChannelEntry(nil)
f.EventRegister(&w, EventMaskWrite)
total := n
for {
// Shorten src to reflect bytes previously written.
src = src.DropFirst64(n)
// Issue the request and break out if it completes with
// anything other than "would block".
n, err = f.Pwritev(t, src, offset+total)
total += n
if err != syserror.ErrWouldBlock {
break
}
// Wait for a notification that we should retry.
if err = t.Block(ch); err != nil {
break
}
}
f.EventUnregister(&w)
if total > 0 {
// Queue notification if we wrote anything.
f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
}
return total, err
}
// LINT.ThenChange(vfs2/read_write.go)