Use FD limit and file size limit from host
FD limit and file size limit is read from the host, instead of using hard-coded defaults, given that they effect the sandbox process. Also limit the direct cache to use no more than half if the available FDs. PiperOrigin-RevId: 244050323 Change-Id: I787ad0fdf07c49d589e51aebfeae477324fe26e6
This commit is contained in:
parent
08d99c5fbe
commit
c8cee7108f
|
@ -12,6 +12,7 @@ go_library(
|
||||||
"dentry.go",
|
"dentry.go",
|
||||||
"dirent.go",
|
"dirent.go",
|
||||||
"dirent_cache.go",
|
"dirent_cache.go",
|
||||||
|
"dirent_cache_limiter.go",
|
||||||
"dirent_list.go",
|
"dirent_list.go",
|
||||||
"dirent_state.go",
|
"dirent_state.go",
|
||||||
"event_list.go",
|
"event_list.go",
|
||||||
|
|
|
@ -26,6 +26,9 @@ type contextID int
|
||||||
const (
|
const (
|
||||||
// CtxRoot is a Context.Value key for a Dirent.
|
// CtxRoot is a Context.Value key for a Dirent.
|
||||||
CtxRoot contextID = iota
|
CtxRoot contextID = iota
|
||||||
|
|
||||||
|
// CtxDirentCacheLimiter is a Context.Value key for DirentCacheLimiter.
|
||||||
|
CtxDirentCacheLimiter
|
||||||
)
|
)
|
||||||
|
|
||||||
// ContextCanAccessFile determines whether `file` can be accessed in the requested way
|
// ContextCanAccessFile determines whether `file` can be accessed in the requested way
|
||||||
|
@ -100,3 +103,12 @@ func RootFromContext(ctx context.Context) *Dirent {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DirentCacheLimiterFromContext returns the DirentCacheLimiter used by ctx, or
|
||||||
|
// nil if ctx does not have a dirent cache limiter.
|
||||||
|
func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter {
|
||||||
|
if v := ctx.Value(CtxDirentCacheLimiter); v != nil {
|
||||||
|
return v.(*DirentCacheLimiter)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -32,6 +32,10 @@ type DirentCache struct {
|
||||||
// when cache is nil.
|
// when cache is nil.
|
||||||
maxSize uint64
|
maxSize uint64
|
||||||
|
|
||||||
|
// limit restricts the number of entries in the cache amoung multiple caches.
|
||||||
|
// It may be nil if there are no global limit for this cache.
|
||||||
|
limit *DirentCacheLimiter
|
||||||
|
|
||||||
// mu protects currentSize and direntList.
|
// mu protects currentSize and direntList.
|
||||||
mu sync.Mutex `state:"nosave"`
|
mu sync.Mutex `state:"nosave"`
|
||||||
|
|
||||||
|
@ -45,8 +49,7 @@ type DirentCache struct {
|
||||||
list direntList `state:"zerovalue"`
|
list direntList `state:"zerovalue"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDirentCache returns a new DirentCache with the given maxSize. If maxSize
|
// NewDirentCache returns a new DirentCache with the given maxSize.
|
||||||
// is 0, nil is returned.
|
|
||||||
func NewDirentCache(maxSize uint64) *DirentCache {
|
func NewDirentCache(maxSize uint64) *DirentCache {
|
||||||
return &DirentCache{
|
return &DirentCache{
|
||||||
maxSize: maxSize,
|
maxSize: maxSize,
|
||||||
|
@ -71,15 +74,24 @@ func (c *DirentCache) Add(d *Dirent) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First check against the global limit.
|
||||||
|
for c.limit != nil && !c.limit.tryInc() {
|
||||||
|
if c.currentSize == 0 {
|
||||||
|
// If the global limit is reached, but there is nothing more to drop from
|
||||||
|
// this cache, there is not much else to do.
|
||||||
|
c.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.remove(c.list.Back())
|
||||||
|
}
|
||||||
|
|
||||||
// d is not in cache. Add it and take a reference.
|
// d is not in cache. Add it and take a reference.
|
||||||
c.list.PushFront(d)
|
c.list.PushFront(d)
|
||||||
d.IncRef()
|
d.IncRef()
|
||||||
c.currentSize++
|
c.currentSize++
|
||||||
|
|
||||||
// Remove the oldest until we are under the size limit.
|
c.maybeShrink()
|
||||||
for c.maxSize > 0 && c.currentSize > c.maxSize {
|
|
||||||
c.remove(c.list.Back())
|
|
||||||
}
|
|
||||||
c.mu.Unlock()
|
c.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,6 +104,9 @@ func (c *DirentCache) remove(d *Dirent) {
|
||||||
d.SetNext(nil)
|
d.SetNext(nil)
|
||||||
d.DecRef()
|
d.DecRef()
|
||||||
c.currentSize--
|
c.currentSize--
|
||||||
|
if c.limit != nil {
|
||||||
|
c.limit.dec()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove removes the element from the cache and decrements its refCount. It
|
// Remove removes the element from the cache and decrements its refCount. It
|
||||||
|
@ -142,3 +157,19 @@ func (c *DirentCache) Invalidate() {
|
||||||
}
|
}
|
||||||
c.mu.Unlock()
|
c.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setMaxSize sets cache max size. If current size is larger than max size, the
|
||||||
|
// cache shrinks to acommodate the new max.
|
||||||
|
func (c *DirentCache) setMaxSize(max uint64) {
|
||||||
|
c.mu.Lock()
|
||||||
|
c.maxSize = max
|
||||||
|
c.maybeShrink()
|
||||||
|
c.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// shrink removes the oldest element until the list is under the size limit.
|
||||||
|
func (c *DirentCache) maybeShrink() {
|
||||||
|
for c.maxSize > 0 && c.currentSize > c.maxSize {
|
||||||
|
c.remove(c.list.Back())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
// Copyright 2018 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DirentCacheLimiter acts as a global limit for all dirent caches in the
|
||||||
|
// process.
|
||||||
|
//
|
||||||
|
// +stateify savable
|
||||||
|
type DirentCacheLimiter struct {
|
||||||
|
mu sync.Mutex `state:"nosave"`
|
||||||
|
max uint64
|
||||||
|
count uint64 `state:"zerovalue"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDirentCacheLimiter creates a new DirentCacheLimiter.
|
||||||
|
func NewDirentCacheLimiter(max uint64) *DirentCacheLimiter {
|
||||||
|
return &DirentCacheLimiter{max: max}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *DirentCacheLimiter) tryInc() bool {
|
||||||
|
d.mu.Lock()
|
||||||
|
if d.count >= d.max {
|
||||||
|
d.mu.Unlock()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
d.count++
|
||||||
|
d.mu.Unlock()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *DirentCacheLimiter) dec() {
|
||||||
|
d.mu.Lock()
|
||||||
|
if d.count == 0 {
|
||||||
|
panic(fmt.Sprintf("underflowing DirentCacheLimiter count: %+v", d))
|
||||||
|
}
|
||||||
|
d.count--
|
||||||
|
d.mu.Unlock()
|
||||||
|
}
|
|
@ -120,6 +120,96 @@ func TestDirentCache(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDirentCacheLimiter(t *testing.T) {
|
||||||
|
const (
|
||||||
|
globalMaxSize = 5
|
||||||
|
maxSize = 3
|
||||||
|
)
|
||||||
|
|
||||||
|
limit := NewDirentCacheLimiter(globalMaxSize)
|
||||||
|
c1 := NewDirentCache(maxSize)
|
||||||
|
c1.limit = limit
|
||||||
|
c2 := NewDirentCache(maxSize)
|
||||||
|
c2.limit = limit
|
||||||
|
|
||||||
|
// Create a Dirent d.
|
||||||
|
d := NewNegativeDirent("")
|
||||||
|
|
||||||
|
// Add d to the cache.
|
||||||
|
c1.Add(d)
|
||||||
|
if got, want := c1.Size(), uint64(1); got != want {
|
||||||
|
t.Errorf("c1.Size() got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add maxSize-1 more elements. d should be oldest element.
|
||||||
|
for i := 0; i < maxSize-1; i++ {
|
||||||
|
c1.Add(NewNegativeDirent(""))
|
||||||
|
}
|
||||||
|
if got, want := c1.Size(), uint64(maxSize); got != want {
|
||||||
|
t.Errorf("c1.Size() got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that d is still there.
|
||||||
|
if got, want := c1.contains(d), true; got != want {
|
||||||
|
t.Errorf("c1.contains(d) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill up the other cache, it will start dropping old entries from the cache
|
||||||
|
// when the global limit is reached.
|
||||||
|
for i := 0; i < maxSize; i++ {
|
||||||
|
c2.Add(NewNegativeDirent(""))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check is what's remaining from global max.
|
||||||
|
if got, want := c2.Size(), globalMaxSize-maxSize; int(got) != want {
|
||||||
|
t.Errorf("c2.Size() got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that d was not dropped.
|
||||||
|
if got, want := c1.contains(d), true; got != want {
|
||||||
|
t.Errorf("c1.contains(d) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add an entry that will eventually be dropped. Check is done later...
|
||||||
|
drop := NewNegativeDirent("")
|
||||||
|
c1.Add(drop)
|
||||||
|
|
||||||
|
// Check that d is bumped to front even when global limit is reached.
|
||||||
|
c1.Add(d)
|
||||||
|
if got, want := c1.contains(d), true; got != want {
|
||||||
|
t.Errorf("c1.contains(d) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add 2 more element and check that:
|
||||||
|
// - d is still in the list: to verify that d was bumped
|
||||||
|
// - d2/d3 are in the list: older entries are dropped when global limit is
|
||||||
|
// reached.
|
||||||
|
// - drop is not in the list: indeed older elements are dropped.
|
||||||
|
d2 := NewNegativeDirent("")
|
||||||
|
c1.Add(d2)
|
||||||
|
d3 := NewNegativeDirent("")
|
||||||
|
c1.Add(d3)
|
||||||
|
if got, want := c1.contains(d), true; got != want {
|
||||||
|
t.Errorf("c1.contains(d) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
if got, want := c1.contains(d2), true; got != want {
|
||||||
|
t.Errorf("c1.contains(d2) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
if got, want := c1.contains(d3), true; got != want {
|
||||||
|
t.Errorf("c1.contains(d3) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
if got, want := c1.contains(drop), false; got != want {
|
||||||
|
t.Errorf("c1.contains(drop) got %v want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop all entries from one cache. The other will be allowed to grow.
|
||||||
|
c1.Invalidate()
|
||||||
|
c2.Add(NewNegativeDirent(""))
|
||||||
|
if got, want := c2.Size(), uint64(maxSize); got != want {
|
||||||
|
t.Errorf("c2.Size() got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestNilDirentCache tests that a nil cache supports all cache operations, but
|
// TestNilDirentCache tests that a nil cache supports all cache operations, but
|
||||||
// treats them as noop.
|
// treats them as noop.
|
||||||
func TestNilDirentCache(t *testing.T) {
|
func TestNilDirentCache(t *testing.T) {
|
||||||
|
|
|
@ -28,6 +28,10 @@ import (
|
||||||
"gvisor.googlesource.com/gvisor/pkg/unet"
|
"gvisor.googlesource.com/gvisor/pkg/unet"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can
|
||||||
|
// be adjusted independentely from the other dirent caches.
|
||||||
|
var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize
|
||||||
|
|
||||||
// +stateify savable
|
// +stateify savable
|
||||||
type endpointMaps struct {
|
type endpointMaps struct {
|
||||||
// mu protexts the direntMap, the keyMap, and the pathMap below.
|
// mu protexts the direntMap, the keyMap, and the pathMap below.
|
||||||
|
@ -249,6 +253,11 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
|
||||||
// Construct the MountSource with the session and superBlockFlags.
|
// Construct the MountSource with the session and superBlockFlags.
|
||||||
m := fs.NewMountSource(s, filesystem, superBlockFlags)
|
m := fs.NewMountSource(s, filesystem, superBlockFlags)
|
||||||
|
|
||||||
|
// Given that gofer files can consume host FDs, restrict the number
|
||||||
|
// of files that can be held by the cache.
|
||||||
|
m.SetDirentCacheMaxSize(DefaultDirentCacheSize)
|
||||||
|
m.SetDirentCacheLimiter(fs.DirentCacheLimiterFromContext(ctx))
|
||||||
|
|
||||||
// Send the Tversion request.
|
// Send the Tversion request.
|
||||||
s.client, err = p9.NewClient(conn, s.msize, s.version)
|
s.client, err = p9.NewClient(conn, s.msize, s.version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -151,9 +151,9 @@ type MountSource struct {
|
||||||
children map[*MountSource]struct{}
|
children map[*MountSource]struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// defaultDirentCacheSize is the number of Dirents that the VFS can hold an extra
|
// DefaultDirentCacheSize is the number of Dirents that the VFS can hold an
|
||||||
// reference on.
|
// extra reference on.
|
||||||
const defaultDirentCacheSize uint64 = 1000
|
const DefaultDirentCacheSize uint64 = 1000
|
||||||
|
|
||||||
// NewMountSource returns a new MountSource. Filesystem may be nil if there is no
|
// NewMountSource returns a new MountSource. Filesystem may be nil if there is no
|
||||||
// filesystem backing the mount.
|
// filesystem backing the mount.
|
||||||
|
@ -162,7 +162,7 @@ func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags Mou
|
||||||
MountSourceOperations: mops,
|
MountSourceOperations: mops,
|
||||||
Flags: flags,
|
Flags: flags,
|
||||||
Filesystem: filesystem,
|
Filesystem: filesystem,
|
||||||
fscache: NewDirentCache(defaultDirentCacheSize),
|
fscache: NewDirentCache(DefaultDirentCacheSize),
|
||||||
children: make(map[*MountSource]struct{}),
|
children: make(map[*MountSource]struct{}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -246,6 +246,18 @@ func (msrc *MountSource) FlushDirentRefs() {
|
||||||
msrc.fscache.Invalidate()
|
msrc.fscache.Invalidate()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetDirentCacheMaxSize sets the max size to the dirent cache associated with
|
||||||
|
// this mount source.
|
||||||
|
func (msrc *MountSource) SetDirentCacheMaxSize(max uint64) {
|
||||||
|
msrc.fscache.setMaxSize(max)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetDirentCacheLimiter sets the limiter objcet to the dirent cache associated
|
||||||
|
// with this mount source.
|
||||||
|
func (msrc *MountSource) SetDirentCacheLimiter(l *DirentCacheLimiter) {
|
||||||
|
msrc.fscache.limit = l
|
||||||
|
}
|
||||||
|
|
||||||
// NewCachingMountSource returns a generic mount that will cache dirents
|
// NewCachingMountSource returns a generic mount that will cache dirents
|
||||||
// aggressively.
|
// aggressively.
|
||||||
func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource {
|
func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource {
|
||||||
|
|
|
@ -31,10 +31,19 @@ type overlayMountSourceOperations struct {
|
||||||
func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *MountSource {
|
func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *MountSource {
|
||||||
upper.IncRef()
|
upper.IncRef()
|
||||||
lower.IncRef()
|
lower.IncRef()
|
||||||
return NewMountSource(&overlayMountSourceOperations{
|
msrc := NewMountSource(&overlayMountSourceOperations{
|
||||||
upper: upper,
|
upper: upper,
|
||||||
lower: lower,
|
lower: lower,
|
||||||
}, &overlayFilesystem{}, flags)
|
}, &overlayFilesystem{}, flags)
|
||||||
|
|
||||||
|
// Use the minimum number to keep resource usage under limits.
|
||||||
|
size := lower.fscache.maxSize
|
||||||
|
if size > upper.fscache.maxSize {
|
||||||
|
size = upper.fscache.maxSize
|
||||||
|
}
|
||||||
|
msrc.fscache.setMaxSize(size)
|
||||||
|
|
||||||
|
return msrc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Revalidate implements MountSourceOperations.Revalidate for an overlay by
|
// Revalidate implements MountSourceOperations.Revalidate for an overlay by
|
||||||
|
|
|
@ -188,6 +188,11 @@ type Kernel struct {
|
||||||
|
|
||||||
// deviceRegistry is used to save/restore device.SimpleDevices.
|
// deviceRegistry is used to save/restore device.SimpleDevices.
|
||||||
deviceRegistry struct{} `state:".(*device.Registry)"`
|
deviceRegistry struct{} `state:".(*device.Registry)"`
|
||||||
|
|
||||||
|
// DirentCacheLimiter controls the number of total dirent entries can be in
|
||||||
|
// caches. Not all caches use it, only the caches that use host resources use
|
||||||
|
// the limiter. It may be nil if disabled.
|
||||||
|
DirentCacheLimiter *fs.DirentCacheLimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitKernelArgs holds arguments to Init.
|
// InitKernelArgs holds arguments to Init.
|
||||||
|
@ -626,6 +631,8 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
|
||||||
return ctx.k.mounts.Root()
|
return ctx.k.mounts.Root()
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
case fs.CtxDirentCacheLimiter:
|
||||||
|
return ctx.k.DirentCacheLimiter
|
||||||
case ktime.CtxRealtimeClock:
|
case ktime.CtxRealtimeClock:
|
||||||
return ctx.k.RealtimeClock()
|
return ctx.k.RealtimeClock()
|
||||||
case limits.CtxLimits:
|
case limits.CtxLimits:
|
||||||
|
@ -1170,6 +1177,8 @@ func (ctx supervisorContext) Value(key interface{}) interface{} {
|
||||||
return auth.NewRootCredentials(ctx.k.rootUserNamespace)
|
return auth.NewRootCredentials(ctx.k.rootUserNamespace)
|
||||||
case fs.CtxRoot:
|
case fs.CtxRoot:
|
||||||
return ctx.k.mounts.Root()
|
return ctx.k.mounts.Root()
|
||||||
|
case fs.CtxDirentCacheLimiter:
|
||||||
|
return ctx.k.DirentCacheLimiter
|
||||||
case ktime.CtxRealtimeClock:
|
case ktime.CtxRealtimeClock:
|
||||||
return ctx.k.RealtimeClock()
|
return ctx.k.RealtimeClock()
|
||||||
case limits.CtxLimits:
|
case limits.CtxLimits:
|
||||||
|
|
|
@ -601,6 +601,8 @@ func (t *Task) Value(key interface{}) interface{} {
|
||||||
return int32(t.ThreadGroup().ID())
|
return int32(t.ThreadGroup().ID())
|
||||||
case fs.CtxRoot:
|
case fs.CtxRoot:
|
||||||
return t.fsc.RootDirectory()
|
return t.fsc.RootDirectory()
|
||||||
|
case fs.CtxDirentCacheLimiter:
|
||||||
|
return t.k.DirentCacheLimiter
|
||||||
case inet.CtxStack:
|
case inet.CtxStack:
|
||||||
return t.NetworkContext()
|
return t.NetworkContext()
|
||||||
case ktime.CtxRealtimeClock:
|
case ktime.CtxRealtimeClock:
|
||||||
|
|
|
@ -20,10 +20,10 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
// Include filesystem types that OCI spec might mount.
|
// Include filesystem types that OCI spec might mount.
|
||||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev"
|
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev"
|
||||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer"
|
|
||||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
|
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
|
||||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc"
|
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc"
|
||||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys"
|
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys"
|
||||||
|
@ -38,6 +38,7 @@ import (
|
||||||
"gvisor.googlesource.com/gvisor/pkg/log"
|
"gvisor.googlesource.com/gvisor/pkg/log"
|
||||||
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
|
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
|
||||||
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
|
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
|
||||||
|
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer"
|
||||||
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
|
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
|
||||||
"gvisor.googlesource.com/gvisor/pkg/syserror"
|
"gvisor.googlesource.com/gvisor/pkg/syserror"
|
||||||
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
||||||
|
@ -81,6 +82,22 @@ func (f *fdDispenser) empty() bool {
|
||||||
return len(f.fds) == 0
|
return len(f.fds) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func adjustDirentCache(k *kernel.Kernel) error {
|
||||||
|
var hl syscall.Rlimit
|
||||||
|
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &hl); err != nil {
|
||||||
|
return fmt.Errorf("getting RLIMIT_NOFILE: %v", err)
|
||||||
|
}
|
||||||
|
if int64(hl.Cur) != syscall.RLIM_INFINITY {
|
||||||
|
newSize := hl.Cur / 2
|
||||||
|
if newSize < gofer.DefaultDirentCacheSize {
|
||||||
|
log.Infof("Setting gofer dirent cache size to %d", newSize)
|
||||||
|
gofer.DefaultDirentCacheSize = newSize
|
||||||
|
k.DirentCacheLimiter = fs.NewDirentCacheLimiter(newSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// setupRootContainerFS creates a mount namespace containing the root filesystem
|
// setupRootContainerFS creates a mount namespace containing the root filesystem
|
||||||
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
|
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
|
||||||
// 'setMountNS' is called after namespace is created. It must set the mount NS
|
// 'setMountNS' is called after namespace is created. It must set the mount NS
|
||||||
|
|
|
@ -16,8 +16,11 @@ package boot
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
"gvisor.googlesource.com/gvisor/pkg/log"
|
||||||
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -41,10 +44,43 @@ var fromLinuxResource = map[string]limits.LimitType{
|
||||||
"RLIMIT_STACK": limits.Stack,
|
"RLIMIT_STACK": limits.Stack,
|
||||||
}
|
}
|
||||||
|
|
||||||
func createLimitSet(spec *specs.Spec) (*limits.LimitSet, error) {
|
func findName(lt limits.LimitType) string {
|
||||||
|
for k, v := range fromLinuxResource {
|
||||||
|
if v == lt {
|
||||||
|
return k
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
|
||||||
|
var defaults defs
|
||||||
|
|
||||||
|
type defs struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
set *limits.LimitSet
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *defs) get() (*limits.LimitSet, error) {
|
||||||
|
d.mu.Lock()
|
||||||
|
defer d.mu.Unlock()
|
||||||
|
|
||||||
|
if d.err != nil {
|
||||||
|
return nil, d.err
|
||||||
|
}
|
||||||
|
if d.set == nil {
|
||||||
|
if err := d.initDefaults(); err != nil {
|
||||||
|
d.err = err
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return d.set, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *defs) initDefaults() error {
|
||||||
ls, err := limits.NewLinuxLimitSet()
|
ls, err := limits.NewLinuxLimitSet()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set default limits based on what containers get by default, ex:
|
// Set default limits based on what containers get by default, ex:
|
||||||
|
@ -66,6 +102,43 @@ func createLimitSet(spec *specs.Spec) (*limits.LimitSet, error) {
|
||||||
ls.SetUnchecked(limits.SignalsPending, limits.Limit{Cur: 0, Max: 0})
|
ls.SetUnchecked(limits.SignalsPending, limits.Limit{Cur: 0, Max: 0})
|
||||||
ls.SetUnchecked(limits.Stack, limits.Limit{Cur: 8388608, Max: limits.Infinity})
|
ls.SetUnchecked(limits.Stack, limits.Limit{Cur: 8388608, Max: limits.Infinity})
|
||||||
|
|
||||||
|
// Read host limits that directly affect the sandbox and adjust the defaults
|
||||||
|
// based on them.
|
||||||
|
for _, res := range []int{syscall.RLIMIT_FSIZE, syscall.RLIMIT_NOFILE} {
|
||||||
|
var hl syscall.Rlimit
|
||||||
|
if err := syscall.Getrlimit(res, &hl); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
lt, ok := limits.FromLinuxResource[res]
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("unknown rlimit type %v", res)
|
||||||
|
}
|
||||||
|
hostLimit := limits.Limit{
|
||||||
|
Cur: limits.FromLinux(hl.Cur),
|
||||||
|
Max: limits.FromLinux(hl.Max),
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultLimit := ls.Get(lt)
|
||||||
|
if hostLimit.Cur != limits.Infinity && hostLimit.Cur < defaultLimit.Cur {
|
||||||
|
log.Warningf("Host limit is lower than recommended, resource: %q, host: %d, recommended: %d", findName(lt), hostLimit.Cur, defaultLimit.Cur)
|
||||||
|
}
|
||||||
|
if hostLimit.Cur != defaultLimit.Cur || hostLimit.Max != defaultLimit.Max {
|
||||||
|
log.Infof("Setting limit from host, resource: %q {soft: %d, hard: %d}", findName(lt), hostLimit.Cur, hostLimit.Max)
|
||||||
|
ls.SetUnchecked(lt, hostLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
d.set = ls
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func createLimitSet(spec *specs.Spec) (*limits.LimitSet, error) {
|
||||||
|
ls, err := defaults.get()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
// Then apply overwrites on top of defaults.
|
// Then apply overwrites on top of defaults.
|
||||||
for _, rl := range spec.Process.Rlimits {
|
for _, rl := range spec.Process.Rlimits {
|
||||||
lt, ok := fromLinuxResource[rl.Type]
|
lt, ok := fromLinuxResource[rl.Type]
|
||||||
|
|
|
@ -274,6 +274,10 @@ func New(args Args) (*Loader, error) {
|
||||||
return nil, fmt.Errorf("initializing kernel: %v", err)
|
return nil, fmt.Errorf("initializing kernel: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := adjustDirentCache(k); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
// Turn on packet logging if enabled.
|
// Turn on packet logging if enabled.
|
||||||
if args.Conf.LogPackets {
|
if args.Conf.LogPackets {
|
||||||
log.Infof("Packet logging enabled")
|
log.Infof("Packet logging enabled")
|
||||||
|
|
|
@ -255,7 +255,16 @@ TEST_F(PollTest, Nfds) {
|
||||||
// Stash value of RLIMIT_NOFILES.
|
// Stash value of RLIMIT_NOFILES.
|
||||||
struct rlimit rlim;
|
struct rlimit rlim;
|
||||||
TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
|
TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
|
||||||
|
|
||||||
|
// gVisor caps the number of FDs that epoll can use beyond RLIMIT_NOFILE.
|
||||||
|
constexpr rlim_t gVisorMax = 1048576;
|
||||||
|
if (rlim.rlim_cur > gVisorMax) {
|
||||||
|
rlim.rlim_cur = gVisorMax;
|
||||||
|
TEST_PCHECK(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
rlim_t max_fds = rlim.rlim_cur;
|
rlim_t max_fds = rlim.rlim_cur;
|
||||||
|
LOG(INFO) << "Using limit: " << max_fds;
|
||||||
|
|
||||||
// Create an eventfd. Since its value is initially zero, it is writable.
|
// Create an eventfd. Since its value is initially zero, it is writable.
|
||||||
FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
|
FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
|
||||||
|
|
Loading…
Reference in New Issue