2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package timerfd implements the semantics of Linux timerfd objects as
|
|
|
|
// described by timerfd_create(2).
|
|
|
|
package timerfd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync/atomic"
|
|
|
|
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/context"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
|
|
|
|
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/usermem"
|
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
|
|
"gvisor.dev/gvisor/pkg/waiter"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// TimerOperations implements fs.FileOperations for timerfds.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-04-27 17:37:02 +00:00
|
|
|
type TimerOperations struct {
|
2019-04-11 07:41:42 +00:00
|
|
|
fsutil.FileZeroSeek `state:"nosave"`
|
|
|
|
fsutil.FileNotDirReaddir `state:"nosave"`
|
|
|
|
fsutil.FileNoFsync `state:"nosave"`
|
|
|
|
fsutil.FileNoIoctl `state:"nosave"`
|
2019-05-21 22:17:05 +00:00
|
|
|
fsutil.FileNoMMap `state:"nosave"`
|
|
|
|
fsutil.FileNoSplice `state:"nosave"`
|
|
|
|
fsutil.FileNoopFlush `state:"nosave"`
|
2019-04-11 07:41:42 +00:00
|
|
|
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2018-08-02 17:41:44 +00:00
|
|
|
events waiter.Queue `state:"zerovalue"`
|
2018-04-27 17:37:02 +00:00
|
|
|
timer *ktime.Timer
|
|
|
|
|
|
|
|
// val is the number of timer expirations since the last successful call to
|
|
|
|
// Readv, Preadv, or SetTime. val is accessed using atomic memory
|
|
|
|
// operations.
|
|
|
|
val uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewFile returns a timerfd File that receives time from c.
|
|
|
|
func NewFile(ctx context.Context, c ktime.Clock) *fs.File {
|
2019-06-14 01:39:43 +00:00
|
|
|
dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[timerfd]")
|
Drop one dirent reference after referenced by file
When pipe is created, a dirent of pipe will be
created and its initial reference is set as 0.
Cause all dirent will only be destroyed when
the reference decreased to -1, so there is already
a 'initial reference' of dirent after it created.
For destroying dirent after all reference released,
the correct way is to drop the 'initial reference'
once someone hold a reference to the dirent, such
as fs.NewFile, otherwise the reference of dirent
will stay 0 all the time, and will cause memory
leak of dirent.
Except pipe, timerfd/eventfd/epoll has the same
problem
Here is a simple case to create memory leak of dirent
for pipe/timerfd/eventfd/epoll in C langange, after
run the case, pprof the runsc process, you will
find lots dirents of pipe/timerfd/eventfd/epoll not
freed:
int main(int argc, char *argv[])
{
int i;
int n;
int pipefd[2];
if (argc != 3) {
printf("Usage: %s epoll|timerfd|eventfd|pipe <iterations>\n", argv[0]);
}
n = strtol(argv[2], NULL, 10);
if (strcmp(argv[1], "epoll") == 0) {
for (i = 0; i < n; ++i)
close(epoll_create(1));
} else if (strcmp(argv[1], "timerfd") == 0) {
for (i = 0; i < n; ++i)
close(timerfd_create(CLOCK_REALTIME, 0));
} else if (strcmp(argv[1], "eventfd") == 0) {
for (i = 0; i < n; ++i)
close(eventfd(0, 0));
} else if (strcmp(argv[1], "pipe") == 0) {
for (i = 0; i < n; ++i)
if (pipe(pipefd) == 0) {
close(pipefd[0]);
close(pipefd[1]);
}
}
printf("%s %s test finished\r\n",argv[1],argv[2]);
return 0;
}
Change-Id: Ia1b8a1fb9142edb00c040e44ec644d007f81f5d2
PiperOrigin-RevId: 251531096
2019-06-04 22:39:24 +00:00
|
|
|
// Release the initial dirent reference after NewFile takes a reference.
|
|
|
|
defer dirent.DecRef()
|
2018-04-27 17:37:02 +00:00
|
|
|
tops := &TimerOperations{}
|
|
|
|
tops.timer = ktime.NewTimer(c, tops)
|
|
|
|
// Timerfds reject writes, but the Write flag must be set in order to
|
|
|
|
// ensure that our Writev/Pwritev methods actually get called to return
|
|
|
|
// the correct errors.
|
|
|
|
return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, tops)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release implements fs.FileOperations.Release.
|
|
|
|
func (t *TimerOperations) Release() {
|
|
|
|
t.timer.Destroy()
|
|
|
|
}
|
|
|
|
|
|
|
|
// PauseTimer pauses the associated Timer.
|
|
|
|
func (t *TimerOperations) PauseTimer() {
|
|
|
|
t.timer.Pause()
|
|
|
|
}
|
|
|
|
|
|
|
|
// ResumeTimer resumes the associated Timer.
|
|
|
|
func (t *TimerOperations) ResumeTimer() {
|
|
|
|
t.timer.Resume()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Clock returns the associated Timer's Clock.
|
|
|
|
func (t *TimerOperations) Clock() ktime.Clock {
|
|
|
|
return t.timer.Clock()
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetTime returns the associated Timer's setting and the time at which it was
|
|
|
|
// observed.
|
|
|
|
func (t *TimerOperations) GetTime() (ktime.Time, ktime.Setting) {
|
|
|
|
return t.timer.Get()
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetTime atomically changes the associated Timer's setting, resets the number
|
|
|
|
// of expirations to 0, and returns the previous setting and the time at which
|
|
|
|
// it was observed.
|
|
|
|
func (t *TimerOperations) SetTime(s ktime.Setting) (ktime.Time, ktime.Setting) {
|
|
|
|
return t.timer.SwapAnd(s, func() { atomic.StoreUint64(&t.val, 0) })
|
|
|
|
}
|
|
|
|
|
|
|
|
// Readiness implements waiter.Waitable.Readiness.
|
|
|
|
func (t *TimerOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
|
|
|
|
var ready waiter.EventMask
|
|
|
|
if atomic.LoadUint64(&t.val) != 0 {
|
|
|
|
ready |= waiter.EventIn
|
|
|
|
}
|
|
|
|
return ready
|
|
|
|
}
|
|
|
|
|
|
|
|
// EventRegister implements waiter.Waitable.EventRegister.
|
|
|
|
func (t *TimerOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
|
|
|
|
t.events.EventRegister(e, mask)
|
|
|
|
}
|
|
|
|
|
|
|
|
// EventUnregister implements waiter.Waitable.EventUnregister.
|
|
|
|
func (t *TimerOperations) EventUnregister(e *waiter.Entry) {
|
|
|
|
t.events.EventUnregister(e)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read implements fs.FileOperations.Read.
|
|
|
|
func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
|
|
|
|
const sizeofUint64 = 8
|
|
|
|
if dst.NumBytes() < sizeofUint64 {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
if val := atomic.SwapUint64(&t.val, 0); val != 0 {
|
|
|
|
var buf [sizeofUint64]byte
|
|
|
|
usermem.ByteOrder.PutUint64(buf[:], val)
|
|
|
|
if _, err := dst.CopyOut(ctx, buf[:]); err != nil {
|
|
|
|
// Linux does not undo consuming the number of expirations even if
|
|
|
|
// writing to userspace fails.
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return sizeofUint64, nil
|
|
|
|
}
|
|
|
|
return 0, syserror.ErrWouldBlock
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write implements fs.FileOperations.Write.
|
|
|
|
func (t *TimerOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
|
|
|
|
return 0, syserror.EINVAL
|
|
|
|
}
|
|
|
|
|
|
|
|
// Notify implements ktime.TimerListener.Notify.
|
Disable cpuClockTicker when app is idle
Kernel.cpuClockTicker increments kernel.cpuClock, which tasks use as a clock to
track their CPU usage. This improves latency in the syscall path by avoid
expensive monotonic clock calls on every syscall entry/exit.
However, this timer fires every 10ms. Thus, when all tasks are idle (i.e.,
blocked or stopped), this forces a sentry wakeup every 10ms, when we may
otherwise be able to sleep until the next app-relevant event. These wakeups
cause the sentry to utilize approximately 2% CPU when the application is
otherwise idle.
Updates to clock are not strictly necessary when the app is idle, as there are
no readers of cpuClock. This commit reduces idle CPU by disabling the timer
when tasks are completely idle, and computing its effects at the next wakeup.
Rather than disabling the timer as soon as the app goes idle, we wait until the
next tick, which provides a window for short sleeps to sleep and wakeup without
doing the (relatively) expensive work of disabling and enabling the timer.
PiperOrigin-RevId: 272265822
2019-10-01 19:13:09 +00:00
|
|
|
func (t *TimerOperations) Notify(exp uint64, setting ktime.Setting) (ktime.Setting, bool) {
|
2018-04-27 17:37:02 +00:00
|
|
|
atomic.AddUint64(&t.val, exp)
|
|
|
|
t.events.Notify(waiter.EventIn)
|
Disable cpuClockTicker when app is idle
Kernel.cpuClockTicker increments kernel.cpuClock, which tasks use as a clock to
track their CPU usage. This improves latency in the syscall path by avoid
expensive monotonic clock calls on every syscall entry/exit.
However, this timer fires every 10ms. Thus, when all tasks are idle (i.e.,
blocked or stopped), this forces a sentry wakeup every 10ms, when we may
otherwise be able to sleep until the next app-relevant event. These wakeups
cause the sentry to utilize approximately 2% CPU when the application is
otherwise idle.
Updates to clock are not strictly necessary when the app is idle, as there are
no readers of cpuClock. This commit reduces idle CPU by disabling the timer
when tasks are completely idle, and computing its effects at the next wakeup.
Rather than disabling the timer as soon as the app goes idle, we wait until the
next tick, which provides a window for short sleeps to sleep and wakeup without
doing the (relatively) expensive work of disabling and enabling the timer.
PiperOrigin-RevId: 272265822
2019-10-01 19:13:09 +00:00
|
|
|
return ktime.Setting{}, false
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Destroy implements ktime.TimerListener.Destroy.
|
|
|
|
func (t *TimerOperations) Destroy() {}
|