ext4: disklayout: inode impl.

PiperOrigin-RevId: 257010414
This commit is contained in:
Ayush Ranjan 2019-07-08 10:43:11 -07:00 committed by gVisor bot
parent 67f2cefce0
commit 8f9b1ca8e7
11 changed files with 504 additions and 40 deletions

View File

@ -8,7 +8,10 @@ go_library(
"block_group.go",
"block_group_32.go",
"block_group_64.go",
"disklayout.go",
"inode.go",
"inode_new.go",
"inode_old.go",
"superblock.go",
"superblock_32.go",
"superblock_64.go",
@ -29,7 +32,9 @@ go_test(
size = "small",
srcs = [
"block_group_test.go",
"inode_test.go",
"superblock_test.go",
],
embed = [":disklayout"],
deps = ["//pkg/sentry/kernel/time"],
)

View File

@ -17,12 +17,6 @@ package disklayout
// BlockGroup32Bit emulates the first half of struct ext4_group_desc in
// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and
// 32-bit ext4 filesystems. It implements BlockGroup interface.
//
// The suffix `Lo` here stands for lower bits because this is also used in the
// 64-bit version where these fields represent the lower half of the fields.
// The suffix `Raw` has been added to indicate that the field does not have a
// counterpart in the 64-bit version and to resolve name collision with the
// interface.
type BlockGroup32Bit struct {
BlockBitmapLo uint32
InodeBitmapLo uint32
@ -38,6 +32,9 @@ type BlockGroup32Bit struct {
ChecksumRaw uint16
}
// Compiles only if BlockGroup32Bit implements BlockGroup.
var _ BlockGroup = (*BlockGroup32Bit)(nil)
// InodeTable implements BlockGroup.InodeTable.
func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) }

View File

@ -18,9 +18,6 @@ package disklayout
// It is the block group descriptor struct for 64-bit ext4 filesystems.
// It implements BlockGroup interface. It is an extension of the 32-bit
// version of BlockGroup.
//
// The suffix `Hi` here stands for upper bits because they represent the upper
// half of the fields.
type BlockGroup64Bit struct {
// We embed the 32-bit struct here because 64-bit version is just an extension
// of the 32-bit version.
@ -40,6 +37,9 @@ type BlockGroup64Bit struct {
_ uint32 // Padding to 64 bytes.
}
// Compiles only if BlockGroup64Bit implements BlockGroup.
var _ BlockGroup = (*BlockGroup64Bit)(nil)
// Methods to override. Checksum() and Flags() are not overridden.
// InodeTable implements BlockGroup.InodeTable.

View File

@ -0,0 +1,50 @@
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package disklayout provides Linux ext file system's disk level structures
// which can be directly read into from the underlying device. Structs aim to
// emulate structures `exactly` how they are layed out on disk.
//
// This library aims to be compatible with all ext(2/3/4) systems so it
// provides a generic interface for all major structures and various
// implementations (for different versions). The user code is responsible for
// using appropriate implementations based on the underlying device.
//
// Interfacing all major structures here serves a few purposes:
// - Abstracts away the complexity of the underlying structure from client
// code. The client only has to figure out versioning on set up and then
// can use these as black boxes and pass it higher up the stack.
// - Having pointer receivers forces the user to use pointers to these
// heavy structs. Hence, prevents the client code from unintentionally
// copying these by value while passing the interface around.
// - Version-based implementation selection is resolved on set up hence
// avoiding per call overhead of choosing implementation.
// - All interface methods are pretty light weight (do not take in any
// parameters by design). Passing pointer arguments to interface methods
// can lead to heap allocation as the compiler won't be able to perform
// escape analysis on an unknown implementation at compile time.
//
// Notes:
// - All fields in these structs are exported because binary.Read would
// panic otherwise.
// - All structures on disk are in little-endian order. Only jbd2 (journal)
// structures are in big-endian order.
// - All OS dependent fields in these structures will be interpretted using
// the Linux version of that field.
// - The suffix `Lo` in field names stands for lower bits of that field.
// - The suffix `Hi` in field names stands for upper bits of that field.
// - The suffix `Raw` has been added to indicate that the field is not split
// into Lo and Hi fields and also to resolve name collision with the
// respective interface.
package disklayout

View File

@ -0,0 +1,96 @@
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package disklayout
import "gvisor.dev/gvisor/pkg/sentry/kernel/time"
// InodeNew represents ext4 inode structure which can be bigger than
// OldInodeSize. The actual size of this struct should be determined using
// inode.ExtraInodeSize. Accessing any field here should be verified with the
// actual size. The extra space between the end of the inode struct and end of
// the inode record can be used to store extended attr.
//
// If the TimeExtra fields are in scope, the lower 2 bits of those are used
// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits
// are used to provide nanoscond precision. Hence, these timestamps will now
// overflow in May 2446.
// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
type InodeNew struct {
InodeOld
ExtraInodeSize uint16
ChecksumHi uint16
ChangeTimeExtra uint32
ModificationTimeExtra uint32
AccessTimeExtra uint32
CreationTime uint32
CreationTimeExtra uint32
VersionHi uint32
ProjectID uint32
}
// Compiles only if InodeNew implements Inode.
var _ Inode = (*InodeNew)(nil)
// fromExtraTime decodes the extra time and constructs the kernel time struct
// with nanosecond precision.
func fromExtraTime(lo int32, extra uint32) time.Time {
// See description above InodeNew for format.
seconds := (int64(extra&0x3) << 32) + int64(lo)
nanoseconds := int64(extra >> 2)
return time.FromUnix(seconds, nanoseconds)
}
// Only override methods which change due to ext4 specific fields.
// Size implements Inode.Size.
func (in *InodeNew) Size() uint64 {
return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo)
}
// InodeSize implements Inode.InodeSize.
func (in *InodeNew) InodeSize() uint16 {
return oldInodeSize + in.ExtraInodeSize
}
// ChangeTime implements Inode.ChangeTime.
func (in *InodeNew) ChangeTime() time.Time {
// Apply new timestamp logic if inode.ChangeTimeExtra is in scope.
if in.ExtraInodeSize >= 8 {
return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra)
}
return in.InodeOld.ChangeTime()
}
// ModificationTime implements Inode.ModificationTime.
func (in *InodeNew) ModificationTime() time.Time {
// Apply new timestamp logic if inode.ModificationTimeExtra is in scope.
if in.ExtraInodeSize >= 12 {
return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra)
}
return in.InodeOld.ModificationTime()
}
// AccessTime implements Inode.AccessTime.
func (in *InodeNew) AccessTime() time.Time {
// Apply new timestamp logic if inode.AccessTimeExtra is in scope.
if in.ExtraInodeSize >= 16 {
return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra)
}
return in.InodeOld.AccessTime()
}

View File

@ -0,0 +1,117 @@
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package disklayout
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
)
const (
// oldInodeSize is the inode size in ext2/ext3.
oldInodeSize = 128
)
// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct.
// Inode struct size and record size are both 128 bytes for this.
//
// All fields representing time are in seconds since the epoch. Which means that
// they will overflow in January 2038.
type InodeOld struct {
ModeRaw uint16
UIDLo uint16
SizeLo uint32
// The time fields are signed integers because they could be negative to
// represent time before the epoch.
AccessTimeRaw int32
ChangeTimeRaw int32
ModificationTimeRaw int32
DeletionTimeRaw int32
GIDLo uint16
LinksCountRaw uint16
BlocksCountLo uint32
FlagsRaw uint32
VersionLo uint32 // This is OS dependent.
BlocksRaw [60]byte
Generation uint32
FileACLLo uint32
SizeHi uint32
ObsoFaddr uint32
// OS dependent fields have been inlined here.
BlocksCountHi uint16
FileACLHi uint16
UIDHi uint16
GIDHi uint16
ChecksumLo uint16
_ uint16
}
// Compiles only if InodeOld implements Inode.
var _ Inode = (*InodeOld)(nil)
// Mode implements Inode.Mode.
func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) }
// UID implements Inode.UID.
func (in *InodeOld) UID() auth.KUID {
return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo))
}
// GID implements Inode.GID.
func (in *InodeOld) GID() auth.KGID {
return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo))
}
// Size implements Inode.Size.
func (in *InodeOld) Size() uint64 {
// In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL.
return uint64(in.SizeLo)
}
// InodeSize implements Inode.InodeSize.
func (in *InodeOld) InodeSize() uint16 { return oldInodeSize }
// AccessTime implements Inode.AccessTime.
func (in *InodeOld) AccessTime() time.Time {
return time.FromUnix(int64(in.AccessTimeRaw), 0)
}
// ChangeTime implements Inode.ChangeTime.
func (in *InodeOld) ChangeTime() time.Time {
return time.FromUnix(int64(in.ChangeTimeRaw), 0)
}
// ModificationTime implements Inode.ModificationTime.
func (in *InodeOld) ModificationTime() time.Time {
return time.FromUnix(int64(in.ModificationTimeRaw), 0)
}
// DeletionTime implements Inode.DeletionTime.
func (in *InodeOld) DeletionTime() time.Time {
return time.FromUnix(int64(in.DeletionTimeRaw), 0)
}
// LinksCount implements Inode.LinksCount.
func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw }
// Flags implements Inode.Flags.
func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) }
// Blocks implements Inode.Blocks.
func (in *InodeOld) Blocks() [60]byte { return in.BlocksRaw }

View File

@ -0,0 +1,222 @@
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package disklayout
import (
"fmt"
"strconv"
"testing"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
)
// TestInodeSize tests that the inode structs are of the correct size.
func TestInodeSize(t *testing.T) {
assertSize(t, InodeOld{}, oldInodeSize)
// This was updated from 156 bytes to 160 bytes in Oct 2015.
assertSize(t, InodeNew{}, 160)
}
// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in
// ext4 inode structs are decoded correctly.
//
// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
func TestTimestampSeconds(t *testing.T) {
type timestampTest struct {
// msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set.
// If this is set then the 32-bit time is negative.
msbSet bool
// lowerBound tells if we should take the lowest possible value of
// InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to
// false it tells to take the highest possible value.
lowerBound bool
// extraBits is InodeNew.[X]TimeExtra.
extraBits uint32
// want is the kernel time struct that is expected.
want time.Time
}
tests := []timestampTest{
// 1901-12-13
{
msbSet: true,
lowerBound: true,
extraBits: 0,
want: time.FromUnix(int64(-0x80000000), 0),
},
// 1969-12-31
{
msbSet: true,
lowerBound: false,
extraBits: 0,
want: time.FromUnix(int64(-1), 0),
},
// 1970-01-01
{
msbSet: false,
lowerBound: true,
extraBits: 0,
want: time.FromUnix(int64(0), 0),
},
// 2038-01-19
{
msbSet: false,
lowerBound: false,
extraBits: 0,
want: time.FromUnix(int64(0x7fffffff), 0),
},
// 2038-01-19
{
msbSet: true,
lowerBound: true,
extraBits: 1,
want: time.FromUnix(int64(0x80000000), 0),
},
// 2106-02-07
{
msbSet: true,
lowerBound: false,
extraBits: 1,
want: time.FromUnix(int64(0xffffffff), 0),
},
// 2106-02-07
{
msbSet: false,
lowerBound: true,
extraBits: 1,
want: time.FromUnix(int64(0x100000000), 0),
},
// 2174-02-25
{
msbSet: false,
lowerBound: false,
extraBits: 1,
want: time.FromUnix(int64(0x17fffffff), 0),
},
// 2174-02-25
{
msbSet: true,
lowerBound: true,
extraBits: 2,
want: time.FromUnix(int64(0x180000000), 0),
},
// 2242-03-16
{
msbSet: true,
lowerBound: false,
extraBits: 2,
want: time.FromUnix(int64(0x1ffffffff), 0),
},
// 2242-03-16
{
msbSet: false,
lowerBound: true,
extraBits: 2,
want: time.FromUnix(int64(0x200000000), 0),
},
// 2310-04-04
{
msbSet: false,
lowerBound: false,
extraBits: 2,
want: time.FromUnix(int64(0x27fffffff), 0),
},
// 2310-04-04
{
msbSet: true,
lowerBound: true,
extraBits: 3,
want: time.FromUnix(int64(0x280000000), 0),
},
// 2378-04-22
{
msbSet: true,
lowerBound: false,
extraBits: 3,
want: time.FromUnix(int64(0x2ffffffff), 0),
},
// 2378-04-22
{
msbSet: false,
lowerBound: true,
extraBits: 3,
want: time.FromUnix(int64(0x300000000), 0),
},
// 2446-05-10
{
msbSet: false,
lowerBound: false,
extraBits: 3,
want: time.FromUnix(int64(0x37fffffff), 0),
},
}
lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000
upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111
lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000
upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111
get32BitTime := func(test timestampTest) int32 {
if test.msbSet {
if test.lowerBound {
return lowerMSB1
}
return upperMSB1
}
if test.lowerBound {
return lowerMSB0
}
return upperMSB0
}
getTestName := func(test timestampTest) string {
return fmt.Sprintf(
"Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t",
strconv.FormatInt(int64(test.extraBits), 2),
test.msbSet,
test.lowerBound,
)
}
for _, test := range tests {
t.Run(getTestName(test), func(t *testing.T) {
if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want {
t.Errorf("Expected: %v, Got: %v", test.want, got)
}
})
}
}

View File

@ -12,22 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Package disklayout provides Linux ext file system's disk level structures
// which can be directly read into from the underlying device. All structures
// on disk are in little-endian order. Only jbd2 (journal) structures are in
// big-endian order. Structs aim to emulate structures `exactly` how they are
// layed out on disk.
//
// This library aims to be compatible with all ext(2/3/4) systems so it
// provides a generic interface for all major structures and various
// implementations (for different versions). The user code is responsible for
// using appropriate implementations based on the underlying device.
//
// Notes:
// - All fields in these structs are exported because binary.Read would
// panic otherwise.
// - All OS dependent fields in these structures will be interpretted using
// the Linux version of that field.
package disklayout
// SuperBlock should be implemented by structs representing the ext superblock.
@ -109,7 +93,7 @@ type SuperBlock interface {
// - Inode disk record size = sb.s_inode_size (function return value).
// = 256 (default)
// - Inode struct size = 128 + inode.i_extra_isize.
// = 128 + 28 = 156 (default)
// = 128 + 32 = 160 (default)
InodeSize() uint16
// InodesPerGroup returns the number of inodes in a block group.

View File

@ -16,10 +16,6 @@ package disklayout
// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of
// the ext4_super_block struct in fs/ext4/ext4.h.
//
// The suffix `Raw` has been added to indicate that the field does not have a
// counterpart in the 64-bit version and to resolve name collision with the
// interface.
type SuperBlock32Bit struct {
// We embed the old superblock struct here because the 32-bit version is just
// an extension of the old version.
@ -52,6 +48,9 @@ type SuperBlock32Bit struct {
JnlBlocks [17]uint32
}
// Compiles only if SuperBlock32Bit implements SuperBlock.
var _ SuperBlock = (*SuperBlock32Bit)(nil)
// Only override methods which change based on the additional fields above.
// Not overriding SuperBlock.BgDescSize because it would still return 32 here.

View File

@ -18,9 +18,6 @@ package disklayout
// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly
// 1024 bytes (smallest possible block size) and hence the superblock always
// fits in no more than one data block.
//
// The suffix `Hi` here stands for upper bits because they represent the upper
// half of the fields.
type SuperBlock64Bit struct {
// We embed the 32-bit struct here because 64-bit version is just an extension
// of the 32-bit version.
@ -78,6 +75,9 @@ type SuperBlock64Bit struct {
Checksum uint32
}
// Compiles only if SuperBlock64Bit implements SuperBlock.
var _ SuperBlock = (*SuperBlock64Bit)(nil)
// Only override methods which change based on the 64-bit feature.
// BlocksCount implements SuperBlock.BlocksCount.

View File

@ -16,12 +16,6 @@ package disklayout
// SuperBlockOld implements SuperBlock and represents the old version of the
// superblock struct in ext2 and ext3 systems.
//
// The suffix `Lo` here stands for lower bits because this is also used in the
// 64-bit version where these fields represent the lower half of the fields.
// The suffix `Raw` has been added to indicate that the field does not have a
// counterpart in the 64-bit version and to resolve name collision with the
// interface.
type SuperBlockOld struct {
InodesCountRaw uint32
BlocksCountLo uint32
@ -84,7 +78,7 @@ func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterS
func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw }
// InodeSize implements SuperBlock.InodeSize.
func (sb *SuperBlockOld) InodeSize() uint16 { return 128 }
func (sb *SuperBlockOld) InodeSize() uint16 { return oldInodeSize }
// InodesPerGroup implements SuperBlock.InodesPerGroup.
func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw }