131 lines
4.1 KiB
Go
131 lines
4.1 KiB
Go
// Copyright 2019 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package hostmm provides tools for interacting with the host Linux kernel's
|
|
// virtual memory management subsystem.
|
|
package hostmm
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"syscall"
|
|
|
|
"gvisor.dev/gvisor/pkg/fd"
|
|
"gvisor.dev/gvisor/pkg/log"
|
|
"gvisor.dev/gvisor/pkg/sentry/usermem"
|
|
)
|
|
|
|
// NotifyCurrentMemcgPressureCallback requests that f is called whenever the
|
|
// calling process' memory cgroup indicates memory pressure of the given level,
|
|
// as specified by Linux's Documentation/cgroup-v1/memory.txt.
|
|
//
|
|
// If NotifyCurrentMemcgPressureCallback succeeds, it returns a function that
|
|
// terminates the requested memory pressure notifications. This function may be
|
|
// called at most once.
|
|
func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error) {
|
|
cgdir, err := currentCgroupDirectory("memory")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pressurePath := path.Join(cgdir, "memory.pressure_level")
|
|
pressureFile, err := os.Open(pressurePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer pressureFile.Close()
|
|
|
|
eventControlPath := path.Join(cgdir, "cgroup.event_control")
|
|
eventControlFile, err := os.OpenFile(eventControlPath, os.O_WRONLY, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer eventControlFile.Close()
|
|
|
|
eventFD, err := newEventFD()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Don't use fmt.Fprintf since the whole string needs to be written in a
|
|
// single syscall.
|
|
eventControlStr := fmt.Sprintf("%d %d %s", eventFD.FD(), pressureFile.Fd(), level)
|
|
if n, err := eventControlFile.Write([]byte(eventControlStr)); n != len(eventControlStr) || err != nil {
|
|
eventFD.Close()
|
|
return nil, fmt.Errorf("error writing %q to %s: got (%d, %v), wanted (%d, nil)", eventControlStr, eventControlPath, n, err, len(eventControlStr))
|
|
}
|
|
|
|
log.Debugf("Receiving memory pressure level notifications from %s at level %q", pressurePath, level)
|
|
const sizeofUint64 = 8
|
|
// The most significant bit of the eventfd value is set by the stop
|
|
// function, which is practically unambiguous since it's not plausible for
|
|
// 2**63 pressure events to occur between eventfd reads.
|
|
const stopVal = 1 << 63
|
|
stopCh := make(chan struct{})
|
|
go func() { // S/R-SAFE: f provides synchronization if necessary
|
|
rw := fd.NewReadWriter(eventFD.FD())
|
|
var buf [sizeofUint64]byte
|
|
for {
|
|
n, err := rw.Read(buf[:])
|
|
if err != nil {
|
|
if err == syscall.EINTR {
|
|
continue
|
|
}
|
|
panic(fmt.Sprintf("failed to read from memory pressure level eventfd: %v", err))
|
|
}
|
|
if n != sizeofUint64 {
|
|
panic(fmt.Sprintf("short read from memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64))
|
|
}
|
|
val := usermem.ByteOrder.Uint64(buf[:])
|
|
if val >= stopVal {
|
|
// Assume this was due to the notifier's "destructor" (the
|
|
// function returned by NotifyCurrentMemcgPressureCallback
|
|
// below) being called.
|
|
eventFD.Close()
|
|
close(stopCh)
|
|
return
|
|
}
|
|
f()
|
|
}
|
|
}()
|
|
return func() {
|
|
rw := fd.NewReadWriter(eventFD.FD())
|
|
var buf [sizeofUint64]byte
|
|
usermem.ByteOrder.PutUint64(buf[:], stopVal)
|
|
for {
|
|
n, err := rw.Write(buf[:])
|
|
if err != nil {
|
|
if err == syscall.EINTR {
|
|
continue
|
|
}
|
|
panic(fmt.Sprintf("failed to write to memory pressure level eventfd: %v", err))
|
|
}
|
|
if n != sizeofUint64 {
|
|
panic(fmt.Sprintf("short write to memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64))
|
|
}
|
|
break
|
|
}
|
|
<-stopCh
|
|
}, nil
|
|
}
|
|
|
|
func newEventFD() (*fd.FD, error) {
|
|
f, _, e := syscall.Syscall(syscall.SYS_EVENTFD2, 0, 0, 0)
|
|
if e != 0 {
|
|
return nil, fmt.Errorf("failed to create eventfd: %v", e)
|
|
}
|
|
return fd.New(int(f)), nil
|
|
}
|