// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package hostmm provides tools for interacting with the host Linux kernel's // virtual memory management subsystem. package hostmm import ( "fmt" "os" "path" "syscall" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/usermem" ) // NotifyCurrentMemcgPressureCallback requests that f is called whenever the // calling process' memory cgroup indicates memory pressure of the given level, // as specified by Linux's Documentation/cgroup-v1/memory.txt. // // If NotifyCurrentMemcgPressureCallback succeeds, it returns a function that // terminates the requested memory pressure notifications. This function may be // called at most once. func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error) { cgdir, err := currentCgroupDirectory("memory") if err != nil { return nil, err } pressurePath := path.Join(cgdir, "memory.pressure_level") pressureFile, err := os.Open(pressurePath) if err != nil { return nil, err } defer pressureFile.Close() eventControlPath := path.Join(cgdir, "cgroup.event_control") eventControlFile, err := os.OpenFile(eventControlPath, os.O_WRONLY, 0) if err != nil { return nil, err } defer eventControlFile.Close() eventFD, err := newEventFD() if err != nil { return nil, err } // Don't use fmt.Fprintf since the whole string needs to be written in a // single syscall. eventControlStr := fmt.Sprintf("%d %d %s", eventFD.FD(), pressureFile.Fd(), level) if n, err := eventControlFile.Write([]byte(eventControlStr)); n != len(eventControlStr) || err != nil { eventFD.Close() return nil, fmt.Errorf("error writing %q to %s: got (%d, %v), wanted (%d, nil)", eventControlStr, eventControlPath, n, err, len(eventControlStr)) } log.Debugf("Receiving memory pressure level notifications from %s at level %q", pressurePath, level) const sizeofUint64 = 8 // The most significant bit of the eventfd value is set by the stop // function, which is practically unambiguous since it's not plausible for // 2**63 pressure events to occur between eventfd reads. const stopVal = 1 << 63 stopCh := make(chan struct{}) go func() { // S/R-SAFE: f provides synchronization if necessary rw := fd.NewReadWriter(eventFD.FD()) var buf [sizeofUint64]byte for { n, err := rw.Read(buf[:]) if err != nil { if err == syscall.EINTR { continue } panic(fmt.Sprintf("failed to read from memory pressure level eventfd: %v", err)) } if n != sizeofUint64 { panic(fmt.Sprintf("short read from memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64)) } val := usermem.ByteOrder.Uint64(buf[:]) if val >= stopVal { // Assume this was due to the notifier's "destructor" (the // function returned by NotifyCurrentMemcgPressureCallback // below) being called. eventFD.Close() close(stopCh) return } f() } }() return func() { rw := fd.NewReadWriter(eventFD.FD()) var buf [sizeofUint64]byte usermem.ByteOrder.PutUint64(buf[:], stopVal) for { n, err := rw.Write(buf[:]) if err != nil { if err == syscall.EINTR { continue } panic(fmt.Sprintf("failed to write to memory pressure level eventfd: %v", err)) } if n != sizeofUint64 { panic(fmt.Sprintf("short write to memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64)) } break } <-stopCh }, nil } func newEventFD() (*fd.FD, error) { f, _, e := syscall.Syscall(syscall.SYS_EVENTFD2, 0, 0, 0) if e != 0 { return nil, fmt.Errorf("failed to create eventfd: %v", e) } return fd.New(int(f)), nil }