Implement sync_file_range()

sync_file_range - sync a file segment with disk

In Linux, sync_file_range() accepts three flags:

       SYNC_FILE_RANGE_WAIT_BEFORE
              Wait  upon  write-out  of  all pages in the specified range that
              have already been submitted to the device driver  for  write-out
              before performing any write.

       SYNC_FILE_RANGE_WRITE
              Initiate  write-out  of  all  dirty pages in the specified range
              which are not presently submitted  write-out.   Note  that  even
              this  may  block if you attempt to write more than request queue
              size.

       SYNC_FILE_RANGE_WAIT_AFTER
              Wait upon write-out of all pages in the range  after  performing
              any write.

In this implementation:

SYNC_FILE_RANGE_WAIT_BEFORE without SYNC_FILE_RANGE_WAIT_AFTER isn't
supported right now.

SYNC_FILE_RANGE_WRITE is skipped. It should initiate write-out of  all
dirty pages, but it doesn't wait, so it should be safe to do nothing
while nobody uses SYNC_FILE_RANGE_WAIT_BEFORE.

SYNC_FILE_RANGE_WAIT_AFTER is equal to fdatasync(). In Linux,
sync_file_range() doesn't writes out the  file's  meta-data, but
fdatasync() does if a file size is changed.

PiperOrigin-RevId: 220730840
Change-Id: Iae5dfb23c2c916967d67cf1a1ad32f25eb3f6286
This commit is contained in:
Andrei Vagin 2018-11-08 17:38:50 -08:00 committed by Shentubot
parent 5a0be6fa20
commit 2ef122da35
3 changed files with 71 additions and 1 deletions

View File

@ -73,3 +73,10 @@ type Statfs struct {
// Spare is unused.
Spare [4]uint64
}
// Sync_file_range flags, from include/uapi/linux/fs.h
const (
SYNC_FILE_RANGE_WAIT_BEFORE = 1
SYNC_FILE_RANGE_WRITE = 2
SYNC_FILE_RANGE_WAIT_AFTER = 4
)

View File

@ -325,7 +325,7 @@ var AMD64 = &kernel.SyscallTable{
274: syscalls.Error(syscall.ENOSYS), // GetRobustList, obsolete
// 275: Splice, TODO
// 276: Tee, TODO
// 277: SyncFileRange, TODO
277: SyncFileRange,
// 278: Vmsplice, TODO
279: syscalls.CapError(linux.CAP_SYS_NICE), // MovePages, requires cap_sys_nice (mostly)
280: Utimensat,

View File

@ -15,6 +15,7 @@
package linux
import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
@ -73,3 +74,65 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncData)
return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
}
// SyncFileRange implements linux syscall sync_file_rage(2)
func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
var err error
offset := args[1].Int64()
nbytes := args[2].Int64()
uflags := args[3].Uint()
if offset < 0 || offset+nbytes < offset {
return 0, nil, syserror.EINVAL
}
if uflags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE|
linux.SYNC_FILE_RANGE_WRITE|
linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 {
return 0, nil, syserror.EINVAL
}
if nbytes == 0 {
nbytes = fs.FileMaxOffset
}
fd := kdefs.FD(args[0].Int())
file := t.FDMap().GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// SYNC_FILE_RANGE_WAIT_BEFORE waits upon write-out of all pages in the
// specified range that have already been submitted to the device
// driver for write-out before performing any write.
if uflags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 &&
uflags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 {
t.Kernel().EmitUnimplementedEvent(t)
return 0, nil, syserror.ENOSYS
}
// SYNC_FILE_RANGE_WRITE initiates write-out of all dirty pages in the
// specified range which are not presently submitted write-out.
//
// It looks impossible to implement this functionality without a
// massive rework of the vfs subsystem. file.Fsync() take a file lock
// for the entire operation, so even if it is running in a go routing,
// it blocks other file operations instead of flushing data in the
// background.
//
// It should be safe to skipped this flag while nobody uses
// SYNC_FILE_RANGE_WAIT_BEFORE.
// SYNC_FILE_RANGE_WAIT_AFTER waits upon write-out of all pages in the
// range after performing any write.
//
// In Linux, sync_file_range() doesn't writes out the file's
// meta-data, but fdatasync() does if a file size is changed.
if uflags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 {
err = file.Fsync(t, offset, fs.FileMaxOffset, fs.SyncData)
}
return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
}