test/runtime: add the timeout option for proctor

proctor is running in a sandbox and it executes tests. If it is able to
handle timeouts, we will know that a test sandbox is alive and proctor
will send SIGTERM to test processes and collect all logs.

In addition, these changes contains a few things:
* upload runsc logs with other test artifacts.
* increase log level for java tests.
* call Fatalf instead of Errorf when we want to terminate a test.

PiperOrigin-RevId: 437385756
This commit is contained in:
Andrei Vagin 2022-03-25 19:36:09 -07:00 committed by gVisor bot
parent 28cf71cc61
commit b8fa96e201
8 changed files with 80 additions and 23 deletions

View File

@ -51,6 +51,11 @@ rm -rf "${profile_output}"
# Clean the bazel cache, if there's failure.
if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then
set -x
if [ -d "/tmp/${BUILDKITE_JOB_ID}/" ]; then
tar -czf "/tmp/${BUILDKITE_JOB_ID}.tar.gz" -C /tmp/ "${BUILDKITE_JOB_ID}"
buildkite-agent artifact upload "/tmp/${BUILDKITE_JOB_ID}.tar.gz"
fi
# Attempt to clear the cache and shut down.
make clean || echo "make clean failed with code $?"
make bazel-shutdown || echo "make bazel-shutdown failed with code $?"
@ -65,4 +70,4 @@ for container in $(docker ps -q); do
if test -n "${maybe_kill}"; then
docker container kill "${container}"
fi
done
done

View File

@ -205,49 +205,49 @@ steps:
# Runtime tests.
- <<: *common
label: ":php: PHP runtime tests"
command: make php7.3.6-runtime-tests
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} php7.3.6-runtime-tests
parallelism: 10
- <<: *common
label: ":java: Java runtime tests"
command: make java11-runtime-tests
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} java11-runtime-tests
parallelism: 40
- <<: *common
label: ":golang: Go runtime tests"
command: make go1.12-runtime-tests
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} go1.12-runtime-tests
parallelism: 10
- <<: *common
label: ":node: NodeJS runtime tests"
command: make nodejs12.4.0-runtime-tests
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} nodejs12.4.0-runtime-tests
parallelism: 10
- <<: *common
label: ":python: Python runtime tests"
command: make python3.7.3-runtime-tests
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} python3.7.3-runtime-tests
parallelism: 10
# Runtime tests (LISAFS).
- <<: *common
label: ":php: PHP runtime tests (LISAFS)"
command: make php7.3.6-runtime-tests_lisafs
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} php7.3.6-runtime-tests_lisafs
parallelism: 10
if: build.message =~ /lisafs/ || build.branch == "master"
- <<: *common
label: ":java: Java runtime tests (LISAFS)"
command: make java11-runtime-tests_lisafs
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} java11-runtime-tests_lisafs
parallelism: 40
if: build.message =~ /lisafs/ || build.branch == "master"
- <<: *common
label: ":golang: Go runtime tests (LISAFS)"
command: make go1.12-runtime-tests_lisafs
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} go1.12-runtime-tests_lisafs
parallelism: 10
if: build.message =~ /lisafs/ || build.branch == "master"
- <<: *common
label: ":node: NodeJS runtime tests (LISAFS)"
command: make nodejs12.4.0-runtime-tests_lisafs
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} nodejs12.4.0-runtime-tests_lisafs
parallelism: 10
if: build.message =~ /lisafs/ || build.branch == "master"
- <<: *common
label: ":python: Python runtime tests (LISAFS)"
command: make python3.7.3-runtime-tests_lisafs
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} python3.7.3-runtime-tests_lisafs
parallelism: 10
if: build.message =~ /lisafs/ || build.branch == "master"

View File

@ -237,8 +237,8 @@ packetimpact-tests:
@$(call test_runtime,$(RUNTIME),--test_timeout=1800 //test/runtimes:$*)
%-runtime-tests_lisafs: load-runtimes_% $(RUNTIME_BIN)
@$(call install_runtime,$(RUNTIME), --lisafs)
@$(call test_runtime,$(RUNTIME),--test_timeout=10800 //test/runtimes:$*)
@$(call install_runtime,$(RUNTIME), --lisafs --watchdog-action=panic)
@$(call test_runtime,$(RUNTIME),--test_timeout=1800 //test/runtimes:$*)
do-tests: $(RUNTIME_BIN)
@$(RUNTIME_BIN) --rootless do true

View File

@ -48,15 +48,27 @@ func (c *Container) Exec(ctx context.Context, opts ExecOpts, args ...string) (st
if err != nil {
return "", err
}
done := make(chan struct{})
var (
out string
outErr error
)
// Read logs from another go-routine to be sure that it doesn't block on
// writing into standard file descriptors.
go func() {
out, outErr = p.Logs()
close(done)
}()
if exitStatus, err := p.WaitExitStatus(ctx); err != nil {
return "", err
} else if exitStatus != 0 {
out, _ := p.Logs()
<-done
return out, fmt.Errorf("process terminated with status: %d", exitStatus)
}
return p.Logs()
<-done
return out, outErr
}
// ExecProcess creates a process inside the container and returns a process struct

View File

@ -66,9 +66,9 @@ func (javaRunner) TestCmds(tests []string) []*exec.Cmd {
[]string{
"-agentvm", // Execute each action using a pool of reusable JVMs.
"-dir:" + javaTestDir, // Base directory for test files and directories.
"-noreport", // Do not generate a final report.
"-timeoutFactor:20", // Extend the default timeout (2 min) of all tests by this factor.
"-verbose:nopass", // Verbose output but supress it for tests that passed.
"-timeoutFactor:5", // Extend the default timeout (2 min) of all tests by this factor.
"-verbose:all", // Verbose output.
"-tl:200", // Do not run tests which specify a timeout longer than 200s.
},
tests...,
)

View File

@ -22,6 +22,7 @@ import (
"log"
"os"
"strings"
"time"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/test/runtimes/proctor/lib"
@ -32,6 +33,7 @@ var (
list = flag.Bool("list", false, "list all available tests")
testNames = flag.String("tests", "", "run a subset of the available tests")
pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children")
timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout")
)
// setNumFilesLimit changes the NOFILE soft rlimit if it is too high.
@ -69,6 +71,8 @@ func main() {
log.Fatalf("runtime flag must be provided")
}
timer := time.NewTimer(*timeout)
tr, err := lib.TestRunnerForRuntime(*runtime)
if err != nil {
log.Fatalf("%v", err)
@ -86,6 +90,14 @@ func main() {
return
}
// heartbeat
go func() {
for {
time.Sleep(15 * time.Second)
log.Println("Proctor checking in " + time.Now().String())
}
}()
var tests []string
if *testNames == "" {
// Run every test.
@ -104,6 +116,33 @@ func main() {
// Run tests.
cmds := tr.TestCmds(tests)
done := make(chan struct{})
defer close(done)
go func() {
select {
case <-done:
return
case <-timer.C:
log.Println("The timeout duration is exceeded")
killed := false
for _, cmd := range cmds {
p := cmd.Process
if p == nil || cmd.ProcessState != nil {
continue
}
pid := p.Pid
if pid > 0 {
unix.Kill(pid, unix.SIGTERM)
killed = true
}
}
if killed {
// Let tests to handle signals
time.Sleep(5 * time.Second)
}
panic("FAIL: The timeout duration is exceeded")
}
}()
for _, cmd := range cmds {
cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr
if err := cmd.Run(); err != nil {

View File

@ -131,7 +131,7 @@ func getTests(ctx context.Context, d *dockerutil.Container, lang, image string,
}
go func() {
output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", lang, "--tests", strings.Join(tcs, ","))
output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", lang, "--tests", strings.Join(tcs, ","), fmt.Sprintf("--timeout=%s", timeout))
close(done)
}()
@ -141,9 +141,10 @@ func getTests(ctx context.Context, d *dockerutil.Container, lang, image string,
fmt.Printf("PASS: (%v) %d tests passed\n", time.Since(now), len(tcs))
return
}
t.Errorf("FAIL: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
case <-time.After(timeout):
t.Errorf("TIMEOUT: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
t.Fatalf("FAIL: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
// Add one minute to let proctor handle timeout.
case <-time.After(timeout + time.Minute):
t.Fatalf("TIMEOUT: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
}
},
})

View File

@ -29,7 +29,7 @@ var (
image = flag.String("image", "", "docker image with runtime tests")
excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment")
batchSize = flag.Int("batch", 50, "number of test cases run in one command")
timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout")
timeout = flag.Duration("timeout", 20*time.Minute, "batch timeout")
)
func main() {