gvisor/pkg/sentry/fsimpl/ext/block_map_file.go

201 lines
6.2 KiB
Go
Raw Normal View History

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ext
import (
"io"
"math"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/syserror"
)
const (
// numDirectBlks is the number of direct blocks in ext block map inodes.
numDirectBlks = 12
)
// blockMapFile is a type of regular file which uses direct/indirect block
// addressing to store file data. This was deprecated in ext4.
type blockMapFile struct {
regFile regularFile
// directBlks are the direct blocks numbers. The physical blocks pointed by
// these holds file data. Contains file blocks 0 to 11.
directBlks [numDirectBlks]uint32
// indirectBlk is the physical block which contains (blkSize/4) direct block
// numbers (as uint32 integers).
indirectBlk uint32
// doubleIndirectBlk is the physical block which contains (blkSize/4) indirect
// block numbers (as uint32 integers).
doubleIndirectBlk uint32
// tripleIndirectBlk is the physical block which contains (blkSize/4) doubly
// indirect block numbers (as uint32 integers).
tripleIndirectBlk uint32
// coverage at (i)th index indicates the amount of file data a node at
// height (i) covers. Height 0 is the direct block.
coverage [4]uint64
}
// Compiles only if blockMapFile implements io.ReaderAt.
var _ io.ReaderAt = (*blockMapFile)(nil)
// newBlockMapFile is the blockMapFile constructor. It initializes the file to
// physical blocks map with (at most) the first 12 (direct) blocks.
func newBlockMapFile(regFile regularFile) (*blockMapFile, error) {
file := &blockMapFile{regFile: regFile}
file.regFile.impl = file
for i := uint(0); i < 4; i++ {
file.coverage[i] = getCoverage(regFile.inode.blkSize, i)
}
blkMap := regFile.inode.diskInode.Data()
binary.Unmarshal(blkMap[:numDirectBlks*4], binary.LittleEndian, &file.directBlks)
binary.Unmarshal(blkMap[numDirectBlks*4:(numDirectBlks+1)*4], binary.LittleEndian, &file.indirectBlk)
binary.Unmarshal(blkMap[(numDirectBlks+1)*4:(numDirectBlks+2)*4], binary.LittleEndian, &file.doubleIndirectBlk)
binary.Unmarshal(blkMap[(numDirectBlks+2)*4:(numDirectBlks+3)*4], binary.LittleEndian, &file.tripleIndirectBlk)
return file, nil
}
// ReadAt implements io.ReaderAt.ReadAt.
func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) {
if len(dst) == 0 {
return 0, nil
}
if off < 0 {
return 0, syserror.EINVAL
}
offset := uint64(off)
size := f.regFile.inode.diskInode.Size()
if offset >= size {
return 0, io.EOF
}
// dirBlksEnd is the file offset until which direct blocks cover file data.
// Direct blocks cover 0 <= file offset < dirBlksEnd.
dirBlksEnd := numDirectBlks * f.coverage[0]
// indirBlkEnd is the file offset until which the indirect block covers file
// data. The indirect block covers dirBlksEnd <= file offset < indirBlkEnd.
indirBlkEnd := dirBlksEnd + f.coverage[1]
// doubIndirBlkEnd is the file offset until which the double indirect block
// covers file data. The double indirect block covers the range
// indirBlkEnd <= file offset < doubIndirBlkEnd.
doubIndirBlkEnd := indirBlkEnd + f.coverage[2]
read := 0
toRead := len(dst)
if uint64(toRead)+offset > size {
toRead = int(size - offset)
}
for read < toRead {
var err error
var curR int
// Figure out which block to delegate the read to.
switch {
case offset < dirBlksEnd:
// Direct block.
curR, err = f.read(f.directBlks[offset/f.regFile.inode.blkSize], offset%f.regFile.inode.blkSize, 0, dst[read:])
case offset < indirBlkEnd:
// Indirect block.
curR, err = f.read(f.indirectBlk, offset-dirBlksEnd, 1, dst[read:])
case offset < doubIndirBlkEnd:
// Doubly indirect block.
curR, err = f.read(f.doubleIndirectBlk, offset-indirBlkEnd, 2, dst[read:])
default:
// Triply indirect block.
curR, err = f.read(f.tripleIndirectBlk, offset-doubIndirBlkEnd, 3, dst[read:])
}
read += curR
offset += uint64(curR)
if err != nil {
return read, err
}
}
if read < len(dst) {
return read, io.EOF
}
return read, nil
}
// read is the recursive step of the ReadAt function. It relies on knowing the
// current node's location on disk (curPhyBlk) and its height in the block map
// tree. A height of 0 shows that the current node is actually holding file
// data. relFileOff tells the offset from which we need to start to reading
// under the current node. It is completely relative to the current node.
func (f *blockMapFile) read(curPhyBlk uint32, relFileOff uint64, height uint, dst []byte) (int, error) {
curPhyBlkOff := int64(curPhyBlk) * int64(f.regFile.inode.blkSize)
if height == 0 {
toRead := int(f.regFile.inode.blkSize - relFileOff)
if len(dst) < toRead {
toRead = len(dst)
}
Minor VFS2 interface changes. - Remove the Filesystem argument from DentryImpl.*Ref(); in general DentryImpls that need the Filesystem for reference counting will probably also need it for other interface methods that don't plumb Filesystem, so it's easier to just store a pointer to the filesystem in the DentryImpl. - Add a pointer to the VirtualFilesystem to Filesystem, which is needed by the gofer client to disown dentries for cache eviction triggered by dentry reference count changes. - Rename FilesystemType.NewFilesystem to GetFilesystem; in some cases (e.g. sysfs, cgroupfs) it's much cleaner for there to be only one Filesystem that is used by all mounts, and in at least one case (devtmpfs) it's visibly incorrect not to do so, so NewFilesystem doesn't always actually create and return a *new* Filesystem. - Require callers of FileDescription.Init() to increment Mount/Dentry references. This is because the gofer client may, in the OpenAt() path, take a reference on a dentry with 0 references, which is safe due to synchronization that is outside the scope of this CL, and it would be safer to still have its implementation of DentryImpl.IncRef() check for an increment for 0 references in other cases. - Add FileDescription.TryIncRef. This is used by the gofer client to take references on "special file descriptions" (FDs for files such as pipes, sockets, and devices), which use per-FD handles (fids) instead of dentry-shared handles, for sync() and syncfs(). PiperOrigin-RevId: 282473364
2019-11-26 02:09:15 +00:00
n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], curPhyBlkOff+int64(relFileOff))
if n < toRead {
return n, syserror.EIO
}
return n, nil
}
childCov := f.coverage[height-1]
startIdx := relFileOff / childCov
endIdx := f.regFile.inode.blkSize / 4 // This is exclusive.
wantEndIdx := (relFileOff + uint64(len(dst))) / childCov
wantEndIdx++ // Make this exclusive.
if wantEndIdx < endIdx {
endIdx = wantEndIdx
}
read := 0
curChildOff := relFileOff % childCov
for i := startIdx; i < endIdx; i++ {
var childPhyBlk uint32
Minor VFS2 interface changes. - Remove the Filesystem argument from DentryImpl.*Ref(); in general DentryImpls that need the Filesystem for reference counting will probably also need it for other interface methods that don't plumb Filesystem, so it's easier to just store a pointer to the filesystem in the DentryImpl. - Add a pointer to the VirtualFilesystem to Filesystem, which is needed by the gofer client to disown dentries for cache eviction triggered by dentry reference count changes. - Rename FilesystemType.NewFilesystem to GetFilesystem; in some cases (e.g. sysfs, cgroupfs) it's much cleaner for there to be only one Filesystem that is used by all mounts, and in at least one case (devtmpfs) it's visibly incorrect not to do so, so NewFilesystem doesn't always actually create and return a *new* Filesystem. - Require callers of FileDescription.Init() to increment Mount/Dentry references. This is because the gofer client may, in the OpenAt() path, take a reference on a dentry with 0 references, which is safe due to synchronization that is outside the scope of this CL, and it would be safer to still have its implementation of DentryImpl.IncRef() check for an increment for 0 references in other cases. - Add FileDescription.TryIncRef. This is used by the gofer client to take references on "special file descriptions" (FDs for files such as pipes, sockets, and devices), which use per-FD handles (fids) instead of dentry-shared handles, for sync() and syncfs(). PiperOrigin-RevId: 282473364
2019-11-26 02:09:15 +00:00
err := readFromDisk(f.regFile.inode.fs.dev, curPhyBlkOff+int64(i*4), &childPhyBlk)
if err != nil {
return read, err
}
n, err := f.read(childPhyBlk, curChildOff, height-1, dst[read:])
read += n
if err != nil {
return read, err
}
curChildOff = 0
}
return read, nil
}
// getCoverage returns the number of bytes a node at the given height covers.
// Height 0 is the file data block itself. Height 1 is the indirect block.
//
// Formula: blkSize * ((blkSize / 4)^height)
func getCoverage(blkSize uint64, height uint) uint64 {
return blkSize * uint64(math.Pow(float64(blkSize/4), float64(height)))
}