diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index c3301f7c5..426003eb7 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -181,6 +181,57 @@ type Stat struct { // SizeOfStat is the size of a Stat struct. var SizeOfStat = binary.Size(Stat{}) +// Flags for statx. +const ( + AT_STATX_SYNC_TYPE = 0x6000 + AT_STATX_SYNC_AS_STAT = 0x0000 + AT_STATX_FORCE_SYNC = 0x2000 + AT_STATX_DONT_SYNC = 0x4000 +) + +// Mask values for statx. +const ( + STATX_TYPE = 0x00000001 + STATX_MODE = 0x00000002 + STATX_NLINK = 0x00000004 + STATX_UID = 0x00000008 + STATX_GID = 0x00000010 + STATX_ATIME = 0x00000020 + STATX_MTIME = 0x00000040 + STATX_CTIME = 0x00000080 + STATX_INO = 0x00000100 + STATX_SIZE = 0x00000200 + STATX_BLOCKS = 0x00000400 + STATX_BASIC_STATS = 0x000007ff + STATX_BTIME = 0x00000800 + STATX_ALL = 0x00000fff + STATX__RESERVED = 0x80000000 +) + +// Statx represents struct statx. +type Statx struct { + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + UID uint32 + GID uint32 + Mode uint16 + _ uint16 + Ino uint64 + Size uint64 + Blocks uint64 + AttributesMask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + RdevMajor uint32 + RdevMinor uint32 + DevMajor uint32 + DevMinor uint32 +} + // FileMode represents a mode_t. type FileMode uint diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go index fa9ee27e1..e727066d7 100644 --- a/pkg/abi/linux/time.go +++ b/pkg/abi/linux/time.go @@ -226,3 +226,18 @@ type Tms struct { // TimerID represents type timer_t, which identifies a POSIX per-process // interval timer. type TimerID int32 + +// StatxTimestamp represents struct statx_timestamp. +type StatxTimestamp struct { + Sec int64 + Nsec uint32 + _ int32 +} + +// NsecToStatxTimestamp translates nanoseconds to StatxTimestamp. +func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) { + return StatxTimestamp{ + Sec: nsec / 1e9, + Nsec: uint32(nsec % 1e9), + } +} diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go index 1d9d7454a..9fc6a5bc2 100644 --- a/pkg/sentry/fs/attr.go +++ b/pkg/sentry/fs/attr.go @@ -89,6 +89,28 @@ func (n InodeType) String() string { } } +// LinuxType returns the linux file type for this inode type. +func (n InodeType) LinuxType() uint32 { + switch n { + case RegularFile, SpecialFile: + return linux.ModeRegular + case Directory, SpecialDirectory: + return linux.ModeDirectory + case Symlink: + return linux.ModeSymlink + case Pipe: + return linux.ModeNamedPipe + case CharacterDevice: + return linux.ModeCharacterDevice + case BlockDevice: + return linux.ModeBlockDevice + case Socket: + return linux.ModeSocket + default: + return 0 + } +} + // StableAttr contains Inode attributes that will be stable throughout the // lifetime of the Inode. // diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 9c3c05239..aa6c75d25 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -142,6 +142,11 @@ func (t Time) Timeval() linux.Timeval { return linux.NsecToTimeval(t.Nanoseconds()) } +// StatxTimestamp converts Time to a Linux statx_timestamp. +func (t Time) StatxTimestamp() linux.StatxTimestamp { + return linux.NsecToStatxTimestamp(t.Nanoseconds()) +} + // Add adds the duration of d to t. func (t Time) Add(d time.Duration) Time { if t.ns > 0 && d.Nanoseconds() > math.MaxInt64-int64(t.ns) { diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 94fc18f27..2a41e8176 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -379,6 +379,7 @@ var AMD64 = &kernel.SyscallTable{ 326: syscalls.ErrorWithEvent("copy_file_range", syscall.ENOSYS, "", nil), 327: syscalls.Undocumented("preadv2", Preadv2), 328: syscalls.Undocumented("pwritev2", Pwritev2), + 397: syscalls.Undocumented("statx", Statx), }, Emulate: map[usermem.Addr]uintptr{ diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 8e4890af4..9a5657254 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -132,24 +132,6 @@ func fstat(t *kernel.Task, f *fs.File, statAddr usermem.Addr) error { // t.CopyObjectOut has noticeable performance impact due to its many slice // allocations and use of reflection. func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs.UnstableAttr) error { - var mode uint32 - switch sattr.Type { - case fs.RegularFile, fs.SpecialFile: - mode |= linux.ModeRegular - case fs.Symlink: - mode |= linux.ModeSymlink - case fs.Directory, fs.SpecialDirectory: - mode |= linux.ModeDirectory - case fs.Pipe: - mode |= linux.ModeNamedPipe - case fs.CharacterDevice: - mode |= linux.ModeCharacterDevice - case fs.BlockDevice: - mode |= linux.ModeBlockDevice - case fs.Socket: - mode |= linux.ModeSocket - } - b := t.CopyScratchBuffer(int(linux.SizeOfStat))[:0] // Dev (uint64) @@ -159,7 +141,7 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs // Nlink (uint64) b = binary.AppendUint64(b, usermem.ByteOrder, uattr.Links) // Mode (uint32) - b = binary.AppendUint32(b, usermem.ByteOrder, mode|uint32(uattr.Perms.LinuxMode())) + b = binary.AppendUint32(b, usermem.ByteOrder, sattr.Type.LinuxType()|uint32(uattr.Perms.LinuxMode())) // UID (uint32) b = binary.AppendUint32(b, usermem.ByteOrder, uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) // GID (uint32) @@ -194,6 +176,98 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs return err } +// Statx implements linux syscall statx(2). +func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := kdefs.FD(args[0].Int()) + pathAddr := args[1].Pointer() + flags := args[2].Int() + mask := args[3].Uint() + statxAddr := args[4].Pointer() + + if mask&linux.STATX__RESERVED > 0 { + return 0, nil, syserror.EINVAL + } + if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { + return 0, nil, syserror.EINVAL + } + + path, dirPath, err := copyInPath(t, pathAddr, flags&linux.AT_EMPTY_PATH != 0) + if err != nil { + return 0, nil, err + } + + if path == "" { + file := t.FDMap().GetFile(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + uattr, err := file.UnstableAttr(t) + if err != nil { + return 0, nil, err + } + return 0, nil, statx(t, file.Dirent.Inode.StableAttr, uattr, statxAddr) + } + + resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0 + + return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + uattr, err := d.Inode.UnstableAttr(t) + if err != nil { + return err + } + return statx(t, d.Inode.StableAttr, uattr, statxAddr) + }) +} + +func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr usermem.Addr) error { + // "[T]he kernel may return fields that weren't requested and may fail to + // return fields that were requested, depending on what the backing + // filesystem supports. + // [...] + // A filesystem may also fill in fields that the caller didn't ask for + // if it has values for them available and the information is available + // at no extra cost. If this happens, the corresponding bits will be + // set in stx_mask." -- statx(2) + // + // We fill in all the values we have (which currently does not include + // btime, see b/135608823), regardless of what the user asked for. The + // STATX_BASIC_STATS mask indicates that all fields are present except + // for btime. + + devMajor, devMinor := linux.DecodeDeviceID(uint32(sattr.DeviceID)) + s := linux.Statx{ + // TODO(b/135608823): Support btime, and then change this to + // STATX_ALL to indicate presence of btime. + Mask: linux.STATX_BASIC_STATS, + + // No attributes, and none supported. + Attributes: 0, + AttributesMask: 0, + + Blksize: uint32(sattr.BlockSize), + Nlink: uint32(uattr.Links), + UID: uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()), + GID: uint32(uattr.Owner.GID.In(t.UserNamespace()).OrOverflow()), + Mode: uint16(sattr.Type.LinuxType()) | uint16(uattr.Perms.LinuxMode()), + Ino: sattr.InodeID, + Size: uint64(uattr.Size), + Blocks: uint64(uattr.Usage) / 512, + Atime: uattr.AccessTime.StatxTimestamp(), + Ctime: uattr.StatusChangeTime.StatxTimestamp(), + Mtime: uattr.ModificationTime.StatxTimestamp(), + RdevMajor: uint32(sattr.DeviceFileMajor), + RdevMinor: sattr.DeviceFileMinor, + DevMajor: uint32(devMajor), + DevMinor: devMinor, + } + _, err := t.CopyOut(statxAddr, &s) + return err +} + // Statfs implements linux syscall statfs(2). func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { addr := args[0].Pointer() @@ -252,8 +326,6 @@ func statfsImpl(t *kernel.Task, d *fs.Dirent, addr usermem.Addr) error { FragmentSize: d.Inode.StableAttr.BlockSize, // Leave other fields 0 like simple_statfs does. } - if _, err := t.CopyOut(addr, &statfs); err != nil { - return err - } - return nil + _, err = t.CopyOut(addr, &statfs) + return err } diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 80ba67496..0e914215d 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -16,7 +16,9 @@ #include #include #include +#include #include + #include #include @@ -554,6 +556,99 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) { EXPECT_EQ(st2_after.st_ino, st2.st_ino); } +#ifndef SYS_statx +#if defined(__x86_64__) +#define SYS_statx 397 +#else +#error "Unknown architecture" +#endif +#endif // SYS_statx + +#ifndef STATX_ALL +#define STATX_ALL 0x00000fffU +#endif // STATX_ALL + +// struct kernel_statx_timestamp is a Linux statx_timestamp struct. +struct kernel_statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; +}; + +// struct kernel_statx is a Linux statx struct. Old versions of glibc do not +// expose it. See include/uapi/linux/stat.h +struct kernel_statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct kernel_statx_timestamp stx_atime; + struct kernel_statx_timestamp stx_btime; + struct kernel_statx_timestamp stx_ctime; + struct kernel_statx_timestamp stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t __spare2[14]; +}; + +int statx(int dirfd, const char *pathname, int flags, unsigned int mask, + struct kernel_statx *statxbuf) { + return syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf); +} + +TEST_F(StatTest, StatxAbsPath) { + SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + struct kernel_statx stx; + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRelPathDirFD) { + SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + struct kernel_statx stx; + auto const dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + auto filename = std::string(Basename(test_file_name_)); + + EXPECT_THAT(statx(dirfd.get(), filename.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRelPathCwd) { + SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + auto filename = std::string(Basename(test_file_name_)); + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, filename.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxEmptyPath) { + SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + struct kernel_statx stx; + EXPECT_THAT(statx(fd.get(), "", AT_EMPTY_PATH, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + } // namespace } // namespace testing