VFS2 gofer client
Updates #1198
Opening host pipes (by spinning in fdpipe) and host sockets is not yet
complete, and will be done in a future CL.
Major differences from VFS1 gofer client (sentry/fs/gofer), with varying levels
of backportability:
- "Cache policies" are replaced by InteropMode, which control the behavior of
timestamps in addition to caching. Under InteropModeExclusive (analogous to
cacheAll) and InteropModeWritethrough (analogous to cacheAllWritethrough),
client timestamps are *not* written back to the server (it is not possible in
9P or Linux for clients to set ctime, so writing back client-authoritative
timestamps results in incoherence between atime/mtime and ctime). Under
InteropModeShared (analogous to cacheRemoteRevalidating), client timestamps
are not used at all (remote filesystem clocks are authoritative). cacheNone
is translated to InteropModeShared + new option
filesystemOptions.specialRegularFiles.
- Under InteropModeShared, "unstable attribute" reloading for permission
checks, lookup, and revalidation are fused, which is feasible in VFS2 since
gofer.filesystem controls path resolution. This results in a ~33% reduction
in RPCs for filesystem operations compared to cacheRemoteRevalidating. For
example, consider stat("/foo/bar/baz") where "/foo/bar/baz" fails
revalidation, resulting in the instantiation of a new dentry:
VFS1 RPCs:
getattr("/") // fs.MountNamespace.FindLink() => fs.Inode.CheckPermission() => gofer.inodeOperations.check() => gofer.inodeOperations.UnstableAttr()
walkgetattr("/", "foo") = fid1 // fs.Dirent.walk() => gofer.session.Revalidate() => gofer.cachePolicy.Revalidate()
clunk(fid1)
getattr("/foo") // CheckPermission
walkgetattr("/foo", "bar") = fid2 // Revalidate
clunk(fid2)
getattr("/foo/bar") // CheckPermission
walkgetattr("/foo/bar", "baz") = fid3 // Revalidate
clunk(fid3)
walkgetattr("/foo/bar", "baz") = fid4 // fs.Dirent.walk() => gofer.inodeOperations.Lookup
getattr("/foo/bar/baz") // linux.stat() => gofer.inodeOperations.UnstableAttr()
VFS2 RPCs:
getattr("/") // gofer.filesystem.walkExistingLocked()
walkgetattr("/", "foo") = fid1 // gofer.filesystem.stepExistingLocked()
clunk(fid1)
// No getattr: walkgetattr already updated metadata for permission check
walkgetattr("/foo", "bar") = fid2
clunk(fid2)
walkgetattr("/foo/bar", "baz") = fid3
// No clunk: fid3 used for new gofer.dentry
// No getattr: walkgetattr already updated metadata for stat()
- gofer.filesystem.unlinkAt() does not require instantiation of a dentry that
represents the file to be deleted. Updates #898.
- gofer.regularFileFD.OnClose() skips Tflushf for regular files under
InteropModeExclusive, as it's nonsensical to request a remote file flush
without flushing locally-buffered writes to that remote file first.
- Symlink targets are cached when InteropModeShared is not in effect.
- p9.QID.Path (which is already required to be unique for each file within a
server, and is accordingly already synthesized from device/inode numbers in
all known gofers) is used as-is for inode numbers, rather than being mapped
along with attr.RDev in the client to yet another synthetic inode number.
- Relevant parts of fsutil.CachingInodeOperations are inlined directly into
gofer package code. This avoids having to duplicate part of its functionality
in fsutil.HostMappable.
PiperOrigin-RevId: 293190213
2020-02-04 19:28:36 +00:00
|
|
|
// Copyright 2019 The gVisor Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package gofer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
|
|
|
"sync/atomic"
|
|
|
|
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
|
|
"gvisor.dev/gvisor/pkg/context"
|
|
|
|
"gvisor.dev/gvisor/pkg/p9"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
|
|
)
|
|
|
|
|
|
|
|
func (d *dentry) isDir() bool {
|
|
|
|
return d.fileType() == linux.S_IFDIR
|
|
|
|
}
|
|
|
|
|
|
|
|
// Preconditions: d.dirMu must be locked. d.isDir(). fs.opts.interop !=
|
|
|
|
// InteropModeShared.
|
|
|
|
func (d *dentry) cacheNegativeChildLocked(name string) {
|
|
|
|
if d.negativeChildren == nil {
|
|
|
|
d.negativeChildren = make(map[string]struct{})
|
|
|
|
}
|
|
|
|
d.negativeChildren[name] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
type directoryFD struct {
|
|
|
|
fileDescription
|
|
|
|
vfs.DirectoryFileDescriptionDefaultImpl
|
|
|
|
|
|
|
|
mu sync.Mutex
|
|
|
|
off int64
|
|
|
|
dirents []vfs.Dirent
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release implements vfs.FileDescriptionImpl.Release.
|
|
|
|
func (fd *directoryFD) Release() {
|
|
|
|
}
|
|
|
|
|
|
|
|
// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
|
|
|
|
func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
|
|
|
|
fd.mu.Lock()
|
|
|
|
defer fd.mu.Unlock()
|
|
|
|
|
|
|
|
if fd.dirents == nil {
|
|
|
|
ds, err := fd.dentry().getDirents(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
fd.dirents = ds
|
|
|
|
}
|
|
|
|
|
|
|
|
for fd.off < int64(len(fd.dirents)) {
|
2020-02-14 22:39:40 +00:00
|
|
|
if err := cb.Handle(fd.dirents[fd.off]); err != nil {
|
|
|
|
return err
|
VFS2 gofer client
Updates #1198
Opening host pipes (by spinning in fdpipe) and host sockets is not yet
complete, and will be done in a future CL.
Major differences from VFS1 gofer client (sentry/fs/gofer), with varying levels
of backportability:
- "Cache policies" are replaced by InteropMode, which control the behavior of
timestamps in addition to caching. Under InteropModeExclusive (analogous to
cacheAll) and InteropModeWritethrough (analogous to cacheAllWritethrough),
client timestamps are *not* written back to the server (it is not possible in
9P or Linux for clients to set ctime, so writing back client-authoritative
timestamps results in incoherence between atime/mtime and ctime). Under
InteropModeShared (analogous to cacheRemoteRevalidating), client timestamps
are not used at all (remote filesystem clocks are authoritative). cacheNone
is translated to InteropModeShared + new option
filesystemOptions.specialRegularFiles.
- Under InteropModeShared, "unstable attribute" reloading for permission
checks, lookup, and revalidation are fused, which is feasible in VFS2 since
gofer.filesystem controls path resolution. This results in a ~33% reduction
in RPCs for filesystem operations compared to cacheRemoteRevalidating. For
example, consider stat("/foo/bar/baz") where "/foo/bar/baz" fails
revalidation, resulting in the instantiation of a new dentry:
VFS1 RPCs:
getattr("/") // fs.MountNamespace.FindLink() => fs.Inode.CheckPermission() => gofer.inodeOperations.check() => gofer.inodeOperations.UnstableAttr()
walkgetattr("/", "foo") = fid1 // fs.Dirent.walk() => gofer.session.Revalidate() => gofer.cachePolicy.Revalidate()
clunk(fid1)
getattr("/foo") // CheckPermission
walkgetattr("/foo", "bar") = fid2 // Revalidate
clunk(fid2)
getattr("/foo/bar") // CheckPermission
walkgetattr("/foo/bar", "baz") = fid3 // Revalidate
clunk(fid3)
walkgetattr("/foo/bar", "baz") = fid4 // fs.Dirent.walk() => gofer.inodeOperations.Lookup
getattr("/foo/bar/baz") // linux.stat() => gofer.inodeOperations.UnstableAttr()
VFS2 RPCs:
getattr("/") // gofer.filesystem.walkExistingLocked()
walkgetattr("/", "foo") = fid1 // gofer.filesystem.stepExistingLocked()
clunk(fid1)
// No getattr: walkgetattr already updated metadata for permission check
walkgetattr("/foo", "bar") = fid2
clunk(fid2)
walkgetattr("/foo/bar", "baz") = fid3
// No clunk: fid3 used for new gofer.dentry
// No getattr: walkgetattr already updated metadata for stat()
- gofer.filesystem.unlinkAt() does not require instantiation of a dentry that
represents the file to be deleted. Updates #898.
- gofer.regularFileFD.OnClose() skips Tflushf for regular files under
InteropModeExclusive, as it's nonsensical to request a remote file flush
without flushing locally-buffered writes to that remote file first.
- Symlink targets are cached when InteropModeShared is not in effect.
- p9.QID.Path (which is already required to be unique for each file within a
server, and is accordingly already synthesized from device/inode numbers in
all known gofers) is used as-is for inode numbers, rather than being mapped
along with attr.RDev in the client to yet another synthetic inode number.
- Relevant parts of fsutil.CachingInodeOperations are inlined directly into
gofer package code. This avoids having to duplicate part of its functionality
in fsutil.HostMappable.
PiperOrigin-RevId: 293190213
2020-02-04 19:28:36 +00:00
|
|
|
}
|
|
|
|
fd.off++
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Preconditions: d.isDir(). There exists at least one directoryFD representing d.
|
|
|
|
func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
|
|
|
|
// 9P2000.L's readdir does not specify behavior in the presence of
|
|
|
|
// concurrent mutation of an iterated directory, so implementations may
|
|
|
|
// duplicate or omit entries in this case, which violates POSIX semantics.
|
|
|
|
// Thus we read all directory entries while holding d.dirMu to exclude
|
|
|
|
// directory mutations. (Note that it is impossible for the client to
|
|
|
|
// exclude concurrent mutation from other remote filesystem users. Since
|
|
|
|
// there is no way to detect if the server has incorrectly omitted
|
|
|
|
// directory entries, we simply assume that the server is well-behaved
|
|
|
|
// under InteropModeShared.) This is inconsistent with Linux (which appears
|
|
|
|
// to assume that directory fids have the correct semantics, and translates
|
|
|
|
// struct file_operations::readdir calls directly to readdir RPCs), but is
|
|
|
|
// consistent with VFS1.
|
2020-02-10 21:04:29 +00:00
|
|
|
//
|
|
|
|
// NOTE(b/135560623): In particular, some gofer implementations may not
|
|
|
|
// retain state between calls to Readdir, so may not provide a coherent
|
|
|
|
// directory stream across in the presence of mutation.
|
VFS2 gofer client
Updates #1198
Opening host pipes (by spinning in fdpipe) and host sockets is not yet
complete, and will be done in a future CL.
Major differences from VFS1 gofer client (sentry/fs/gofer), with varying levels
of backportability:
- "Cache policies" are replaced by InteropMode, which control the behavior of
timestamps in addition to caching. Under InteropModeExclusive (analogous to
cacheAll) and InteropModeWritethrough (analogous to cacheAllWritethrough),
client timestamps are *not* written back to the server (it is not possible in
9P or Linux for clients to set ctime, so writing back client-authoritative
timestamps results in incoherence between atime/mtime and ctime). Under
InteropModeShared (analogous to cacheRemoteRevalidating), client timestamps
are not used at all (remote filesystem clocks are authoritative). cacheNone
is translated to InteropModeShared + new option
filesystemOptions.specialRegularFiles.
- Under InteropModeShared, "unstable attribute" reloading for permission
checks, lookup, and revalidation are fused, which is feasible in VFS2 since
gofer.filesystem controls path resolution. This results in a ~33% reduction
in RPCs for filesystem operations compared to cacheRemoteRevalidating. For
example, consider stat("/foo/bar/baz") where "/foo/bar/baz" fails
revalidation, resulting in the instantiation of a new dentry:
VFS1 RPCs:
getattr("/") // fs.MountNamespace.FindLink() => fs.Inode.CheckPermission() => gofer.inodeOperations.check() => gofer.inodeOperations.UnstableAttr()
walkgetattr("/", "foo") = fid1 // fs.Dirent.walk() => gofer.session.Revalidate() => gofer.cachePolicy.Revalidate()
clunk(fid1)
getattr("/foo") // CheckPermission
walkgetattr("/foo", "bar") = fid2 // Revalidate
clunk(fid2)
getattr("/foo/bar") // CheckPermission
walkgetattr("/foo/bar", "baz") = fid3 // Revalidate
clunk(fid3)
walkgetattr("/foo/bar", "baz") = fid4 // fs.Dirent.walk() => gofer.inodeOperations.Lookup
getattr("/foo/bar/baz") // linux.stat() => gofer.inodeOperations.UnstableAttr()
VFS2 RPCs:
getattr("/") // gofer.filesystem.walkExistingLocked()
walkgetattr("/", "foo") = fid1 // gofer.filesystem.stepExistingLocked()
clunk(fid1)
// No getattr: walkgetattr already updated metadata for permission check
walkgetattr("/foo", "bar") = fid2
clunk(fid2)
walkgetattr("/foo/bar", "baz") = fid3
// No clunk: fid3 used for new gofer.dentry
// No getattr: walkgetattr already updated metadata for stat()
- gofer.filesystem.unlinkAt() does not require instantiation of a dentry that
represents the file to be deleted. Updates #898.
- gofer.regularFileFD.OnClose() skips Tflushf for regular files under
InteropModeExclusive, as it's nonsensical to request a remote file flush
without flushing locally-buffered writes to that remote file first.
- Symlink targets are cached when InteropModeShared is not in effect.
- p9.QID.Path (which is already required to be unique for each file within a
server, and is accordingly already synthesized from device/inode numbers in
all known gofers) is used as-is for inode numbers, rather than being mapped
along with attr.RDev in the client to yet another synthetic inode number.
- Relevant parts of fsutil.CachingInodeOperations are inlined directly into
gofer package code. This avoids having to duplicate part of its functionality
in fsutil.HostMappable.
PiperOrigin-RevId: 293190213
2020-02-04 19:28:36 +00:00
|
|
|
|
|
|
|
d.fs.renameMu.RLock()
|
|
|
|
defer d.fs.renameMu.RUnlock()
|
|
|
|
d.dirMu.Lock()
|
|
|
|
defer d.dirMu.Unlock()
|
|
|
|
if d.dirents != nil {
|
|
|
|
return d.dirents, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// It's not clear if 9P2000.L's readdir is expected to return "." and "..",
|
|
|
|
// so we generate them here.
|
|
|
|
parent := d.vfsd.ParentOrSelf().Impl().(*dentry)
|
|
|
|
dirents := []vfs.Dirent{
|
|
|
|
{
|
|
|
|
Name: ".",
|
|
|
|
Type: linux.DT_DIR,
|
|
|
|
Ino: d.ino,
|
|
|
|
NextOff: 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Name: "..",
|
|
|
|
Type: uint8(atomic.LoadUint32(&parent.mode) >> 12),
|
|
|
|
Ino: parent.ino,
|
|
|
|
NextOff: 2,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
off := uint64(0)
|
|
|
|
const count = 64 * 1024 // for consistency with the vfs1 client
|
|
|
|
d.handleMu.RLock()
|
|
|
|
defer d.handleMu.RUnlock()
|
|
|
|
if !d.handleReadable {
|
|
|
|
// This should not be possible because a readable handle should have
|
|
|
|
// been opened when the calling directoryFD was opened.
|
|
|
|
panic("gofer.dentry.getDirents called without a readable handle")
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
p9ds, err := d.handle.file.readdir(ctx, off, count)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if len(p9ds) == 0 {
|
|
|
|
// Cache dirents for future directoryFDs if permitted.
|
|
|
|
if d.fs.opts.interop != InteropModeShared {
|
|
|
|
d.dirents = dirents
|
|
|
|
}
|
|
|
|
return dirents, nil
|
|
|
|
}
|
|
|
|
for _, p9d := range p9ds {
|
|
|
|
if p9d.Name == "." || p9d.Name == ".." {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
dirent := vfs.Dirent{
|
|
|
|
Name: p9d.Name,
|
|
|
|
Ino: p9d.QID.Path,
|
|
|
|
NextOff: int64(len(dirents) + 1),
|
|
|
|
}
|
|
|
|
// p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or
|
|
|
|
// DMSOCKET.
|
|
|
|
switch p9d.Type {
|
|
|
|
case p9.TypeSymlink:
|
|
|
|
dirent.Type = linux.DT_LNK
|
|
|
|
case p9.TypeDir:
|
|
|
|
dirent.Type = linux.DT_DIR
|
|
|
|
default:
|
|
|
|
dirent.Type = linux.DT_REG
|
|
|
|
}
|
|
|
|
dirents = append(dirents, dirent)
|
|
|
|
}
|
|
|
|
off = p9ds[len(p9ds)-1].Offset
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Seek implements vfs.FileDescriptionImpl.Seek.
|
|
|
|
func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
|
|
|
|
fd.mu.Lock()
|
|
|
|
defer fd.mu.Unlock()
|
|
|
|
|
|
|
|
switch whence {
|
|
|
|
case linux.SEEK_SET:
|
|
|
|
if offset < 0 {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
if offset == 0 {
|
|
|
|
// Ensure that the next call to fd.IterDirents() calls
|
|
|
|
// fd.dentry().getDirents().
|
|
|
|
fd.dirents = nil
|
|
|
|
}
|
|
|
|
fd.off = offset
|
|
|
|
return fd.off, nil
|
|
|
|
case linux.SEEK_CUR:
|
|
|
|
offset += fd.off
|
|
|
|
if offset < 0 {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
// Don't clear fd.dirents in this case, even if offset == 0.
|
|
|
|
fd.off = offset
|
|
|
|
return fd.off, nil
|
|
|
|
default:
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
}
|