diff --git a/.buildkite/hooks/post-command b/.buildkite/hooks/post-command index 6a84bbdb2..d41100610 100644 --- a/.buildkite/hooks/post-command +++ b/.buildkite/hooks/post-command @@ -51,6 +51,11 @@ rm -rf "${profile_output}" # Clean the bazel cache, if there's failure. if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then + set -x + if [ -d "/tmp/${BUILDKITE_JOB_ID}/" ]; then + tar -czf "/tmp/${BUILDKITE_JOB_ID}.tar.gz" -C /tmp/ "${BUILDKITE_JOB_ID}" + buildkite-agent artifact upload "/tmp/${BUILDKITE_JOB_ID}.tar.gz" + fi # Attempt to clear the cache and shut down. make clean || echo "make clean failed with code $?" make bazel-shutdown || echo "make bazel-shutdown failed with code $?" @@ -65,4 +70,4 @@ for container in $(docker ps -q); do if test -n "${maybe_kill}"; then docker container kill "${container}" fi -done \ No newline at end of file +done diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml index 58c3e306c..31d594405 100644 --- a/.buildkite/pipeline.yaml +++ b/.buildkite/pipeline.yaml @@ -205,49 +205,49 @@ steps: # Runtime tests. - <<: *common label: ":php: PHP runtime tests" - command: make php7.3.6-runtime-tests + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} php7.3.6-runtime-tests parallelism: 10 - <<: *common label: ":java: Java runtime tests" - command: make java11-runtime-tests + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} java11-runtime-tests parallelism: 40 - <<: *common label: ":golang: Go runtime tests" - command: make go1.12-runtime-tests + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} go1.12-runtime-tests parallelism: 10 - <<: *common label: ":node: NodeJS runtime tests" - command: make nodejs12.4.0-runtime-tests + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} nodejs12.4.0-runtime-tests parallelism: 10 - <<: *common label: ":python: Python runtime tests" - command: make python3.7.3-runtime-tests + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} python3.7.3-runtime-tests parallelism: 10 # Runtime tests (LISAFS). - <<: *common label: ":php: PHP runtime tests (LISAFS)" - command: make php7.3.6-runtime-tests_lisafs + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} php7.3.6-runtime-tests_lisafs parallelism: 10 if: build.message =~ /lisafs/ || build.branch == "master" - <<: *common label: ":java: Java runtime tests (LISAFS)" - command: make java11-runtime-tests_lisafs + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} java11-runtime-tests_lisafs parallelism: 40 if: build.message =~ /lisafs/ || build.branch == "master" - <<: *common label: ":golang: Go runtime tests (LISAFS)" - command: make go1.12-runtime-tests_lisafs + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} go1.12-runtime-tests_lisafs parallelism: 10 if: build.message =~ /lisafs/ || build.branch == "master" - <<: *common label: ":node: NodeJS runtime tests (LISAFS)" - command: make nodejs12.4.0-runtime-tests_lisafs + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} nodejs12.4.0-runtime-tests_lisafs parallelism: 10 if: build.message =~ /lisafs/ || build.branch == "master" - <<: *common label: ":python: Python runtime tests (LISAFS)" - command: make python3.7.3-runtime-tests_lisafs + command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} python3.7.3-runtime-tests_lisafs parallelism: 10 if: build.message =~ /lisafs/ || build.branch == "master" diff --git a/Makefile b/Makefile index 50e5e7da0..cda276b4f 100644 --- a/Makefile +++ b/Makefile @@ -237,8 +237,8 @@ packetimpact-tests: @$(call test_runtime,$(RUNTIME),--test_timeout=1800 //test/runtimes:$*) %-runtime-tests_lisafs: load-runtimes_% $(RUNTIME_BIN) - @$(call install_runtime,$(RUNTIME), --lisafs) - @$(call test_runtime,$(RUNTIME),--test_timeout=10800 //test/runtimes:$*) + @$(call install_runtime,$(RUNTIME), --lisafs --watchdog-action=panic) + @$(call test_runtime,$(RUNTIME),--test_timeout=1800 //test/runtimes:$*) do-tests: $(RUNTIME_BIN) @$(RUNTIME_BIN) --rootless do true diff --git a/pkg/test/dockerutil/exec.go b/pkg/test/dockerutil/exec.go index bf968acec..63449860d 100644 --- a/pkg/test/dockerutil/exec.go +++ b/pkg/test/dockerutil/exec.go @@ -48,15 +48,27 @@ func (c *Container) Exec(ctx context.Context, opts ExecOpts, args ...string) (st if err != nil { return "", err } + done := make(chan struct{}) + var ( + out string + outErr error + ) + // Read logs from another go-routine to be sure that it doesn't block on + // writing into standard file descriptors. + go func() { + out, outErr = p.Logs() + close(done) + }() if exitStatus, err := p.WaitExitStatus(ctx); err != nil { return "", err } else if exitStatus != 0 { - out, _ := p.Logs() + <-done return out, fmt.Errorf("process terminated with status: %d", exitStatus) } - return p.Logs() + <-done + return out, outErr } // ExecProcess creates a process inside the container and returns a process struct diff --git a/test/runtimes/proctor/lib/java.go b/test/runtimes/proctor/lib/java.go index 3105011ff..69773e3e5 100644 --- a/test/runtimes/proctor/lib/java.go +++ b/test/runtimes/proctor/lib/java.go @@ -66,9 +66,9 @@ func (javaRunner) TestCmds(tests []string) []*exec.Cmd { []string{ "-agentvm", // Execute each action using a pool of reusable JVMs. "-dir:" + javaTestDir, // Base directory for test files and directories. - "-noreport", // Do not generate a final report. - "-timeoutFactor:20", // Extend the default timeout (2 min) of all tests by this factor. - "-verbose:nopass", // Verbose output but supress it for tests that passed. + "-timeoutFactor:5", // Extend the default timeout (2 min) of all tests by this factor. + "-verbose:all", // Verbose output. + "-tl:200", // Do not run tests which specify a timeout longer than 200s. }, tests..., ) diff --git a/test/runtimes/proctor/main.go b/test/runtimes/proctor/main.go index 8c076a499..ea0b0ed4e 100644 --- a/test/runtimes/proctor/main.go +++ b/test/runtimes/proctor/main.go @@ -22,6 +22,7 @@ import ( "log" "os" "strings" + "time" "golang.org/x/sys/unix" "gvisor.dev/gvisor/test/runtimes/proctor/lib" @@ -32,6 +33,7 @@ var ( list = flag.Bool("list", false, "list all available tests") testNames = flag.String("tests", "", "run a subset of the available tests") pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") + timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout") ) // setNumFilesLimit changes the NOFILE soft rlimit if it is too high. @@ -69,6 +71,8 @@ func main() { log.Fatalf("runtime flag must be provided") } + timer := time.NewTimer(*timeout) + tr, err := lib.TestRunnerForRuntime(*runtime) if err != nil { log.Fatalf("%v", err) @@ -86,6 +90,14 @@ func main() { return } + // heartbeat + go func() { + for { + time.Sleep(15 * time.Second) + log.Println("Proctor checking in " + time.Now().String()) + } + }() + var tests []string if *testNames == "" { // Run every test. @@ -104,6 +116,33 @@ func main() { // Run tests. cmds := tr.TestCmds(tests) + done := make(chan struct{}) + defer close(done) + go func() { + select { + case <-done: + return + case <-timer.C: + log.Println("The timeout duration is exceeded") + killed := false + for _, cmd := range cmds { + p := cmd.Process + if p == nil || cmd.ProcessState != nil { + continue + } + pid := p.Pid + if pid > 0 { + unix.Kill(pid, unix.SIGTERM) + killed = true + } + } + if killed { + // Let tests to handle signals + time.Sleep(5 * time.Second) + } + panic("FAIL: The timeout duration is exceeded") + } + }() for _, cmd := range cmds { cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr if err := cmd.Run(); err != nil { diff --git a/test/runtimes/runner/lib/lib.go b/test/runtimes/runner/lib/lib.go index 6b95b4cfa..c1d5e8778 100644 --- a/test/runtimes/runner/lib/lib.go +++ b/test/runtimes/runner/lib/lib.go @@ -131,7 +131,7 @@ func getTests(ctx context.Context, d *dockerutil.Container, lang, image string, } go func() { - output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", lang, "--tests", strings.Join(tcs, ",")) + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", lang, "--tests", strings.Join(tcs, ","), fmt.Sprintf("--timeout=%s", timeout)) close(done) }() @@ -141,9 +141,10 @@ func getTests(ctx context.Context, d *dockerutil.Container, lang, image string, fmt.Printf("PASS: (%v) %d tests passed\n", time.Since(now), len(tcs)) return } - t.Errorf("FAIL: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output) - case <-time.After(timeout): - t.Errorf("TIMEOUT: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output) + t.Fatalf("FAIL: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output) + // Add one minute to let proctor handle timeout. + case <-time.After(timeout + time.Minute): + t.Fatalf("TIMEOUT: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output) } }, }) diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index ec79a22c2..6a2eb2859 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -29,7 +29,7 @@ var ( image = flag.String("image", "", "docker image with runtime tests") excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") batchSize = flag.Int("batch", 50, "number of test cases run in one command") - timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout") + timeout = flag.Duration("timeout", 20*time.Minute, "batch timeout") ) func main() {