test/runtime: add the timeout option for proctor
proctor is running in a sandbox and it executes tests. If it is able to handle timeouts, we will know that a test sandbox is alive and proctor will send SIGTERM to test processes and collect all logs. In addition, these changes contains a few things: * upload runsc logs with other test artifacts. * increase log level for java tests. * call Fatalf instead of Errorf when we want to terminate a test. PiperOrigin-RevId: 437385756
This commit is contained in:
parent
28cf71cc61
commit
b8fa96e201
|
@ -51,6 +51,11 @@ rm -rf "${profile_output}"
|
|||
|
||||
# Clean the bazel cache, if there's failure.
|
||||
if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then
|
||||
set -x
|
||||
if [ -d "/tmp/${BUILDKITE_JOB_ID}/" ]; then
|
||||
tar -czf "/tmp/${BUILDKITE_JOB_ID}.tar.gz" -C /tmp/ "${BUILDKITE_JOB_ID}"
|
||||
buildkite-agent artifact upload "/tmp/${BUILDKITE_JOB_ID}.tar.gz"
|
||||
fi
|
||||
# Attempt to clear the cache and shut down.
|
||||
make clean || echo "make clean failed with code $?"
|
||||
make bazel-shutdown || echo "make bazel-shutdown failed with code $?"
|
||||
|
@ -65,4 +70,4 @@ for container in $(docker ps -q); do
|
|||
if test -n "${maybe_kill}"; then
|
||||
docker container kill "${container}"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
|
|
@ -205,49 +205,49 @@ steps:
|
|||
# Runtime tests.
|
||||
- <<: *common
|
||||
label: ":php: PHP runtime tests"
|
||||
command: make php7.3.6-runtime-tests
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} php7.3.6-runtime-tests
|
||||
parallelism: 10
|
||||
- <<: *common
|
||||
label: ":java: Java runtime tests"
|
||||
command: make java11-runtime-tests
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} java11-runtime-tests
|
||||
parallelism: 40
|
||||
- <<: *common
|
||||
label: ":golang: Go runtime tests"
|
||||
command: make go1.12-runtime-tests
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} go1.12-runtime-tests
|
||||
parallelism: 10
|
||||
- <<: *common
|
||||
label: ":node: NodeJS runtime tests"
|
||||
command: make nodejs12.4.0-runtime-tests
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} nodejs12.4.0-runtime-tests
|
||||
parallelism: 10
|
||||
- <<: *common
|
||||
label: ":python: Python runtime tests"
|
||||
command: make python3.7.3-runtime-tests
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} python3.7.3-runtime-tests
|
||||
parallelism: 10
|
||||
|
||||
# Runtime tests (LISAFS).
|
||||
- <<: *common
|
||||
label: ":php: PHP runtime tests (LISAFS)"
|
||||
command: make php7.3.6-runtime-tests_lisafs
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} php7.3.6-runtime-tests_lisafs
|
||||
parallelism: 10
|
||||
if: build.message =~ /lisafs/ || build.branch == "master"
|
||||
- <<: *common
|
||||
label: ":java: Java runtime tests (LISAFS)"
|
||||
command: make java11-runtime-tests_lisafs
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} java11-runtime-tests_lisafs
|
||||
parallelism: 40
|
||||
if: build.message =~ /lisafs/ || build.branch == "master"
|
||||
- <<: *common
|
||||
label: ":golang: Go runtime tests (LISAFS)"
|
||||
command: make go1.12-runtime-tests_lisafs
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} go1.12-runtime-tests_lisafs
|
||||
parallelism: 10
|
||||
if: build.message =~ /lisafs/ || build.branch == "master"
|
||||
- <<: *common
|
||||
label: ":node: NodeJS runtime tests (LISAFS)"
|
||||
command: make nodejs12.4.0-runtime-tests_lisafs
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} nodejs12.4.0-runtime-tests_lisafs
|
||||
parallelism: 10
|
||||
if: build.message =~ /lisafs/ || build.branch == "master"
|
||||
- <<: *common
|
||||
label: ":python: Python runtime tests (LISAFS)"
|
||||
command: make python3.7.3-runtime-tests_lisafs
|
||||
command: make RUNTIME_LOG_DIR=/tmp/$${BUILDKITE_JOB_ID} python3.7.3-runtime-tests_lisafs
|
||||
parallelism: 10
|
||||
if: build.message =~ /lisafs/ || build.branch == "master"
|
||||
|
||||
|
|
4
Makefile
4
Makefile
|
@ -237,8 +237,8 @@ packetimpact-tests:
|
|||
@$(call test_runtime,$(RUNTIME),--test_timeout=1800 //test/runtimes:$*)
|
||||
|
||||
%-runtime-tests_lisafs: load-runtimes_% $(RUNTIME_BIN)
|
||||
@$(call install_runtime,$(RUNTIME), --lisafs)
|
||||
@$(call test_runtime,$(RUNTIME),--test_timeout=10800 //test/runtimes:$*)
|
||||
@$(call install_runtime,$(RUNTIME), --lisafs --watchdog-action=panic)
|
||||
@$(call test_runtime,$(RUNTIME),--test_timeout=1800 //test/runtimes:$*)
|
||||
|
||||
do-tests: $(RUNTIME_BIN)
|
||||
@$(RUNTIME_BIN) --rootless do true
|
||||
|
|
|
@ -48,15 +48,27 @@ func (c *Container) Exec(ctx context.Context, opts ExecOpts, args ...string) (st
|
|||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
done := make(chan struct{})
|
||||
var (
|
||||
out string
|
||||
outErr error
|
||||
)
|
||||
// Read logs from another go-routine to be sure that it doesn't block on
|
||||
// writing into standard file descriptors.
|
||||
go func() {
|
||||
out, outErr = p.Logs()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
if exitStatus, err := p.WaitExitStatus(ctx); err != nil {
|
||||
return "", err
|
||||
} else if exitStatus != 0 {
|
||||
out, _ := p.Logs()
|
||||
<-done
|
||||
return out, fmt.Errorf("process terminated with status: %d", exitStatus)
|
||||
}
|
||||
|
||||
return p.Logs()
|
||||
<-done
|
||||
return out, outErr
|
||||
}
|
||||
|
||||
// ExecProcess creates a process inside the container and returns a process struct
|
||||
|
|
|
@ -66,9 +66,9 @@ func (javaRunner) TestCmds(tests []string) []*exec.Cmd {
|
|||
[]string{
|
||||
"-agentvm", // Execute each action using a pool of reusable JVMs.
|
||||
"-dir:" + javaTestDir, // Base directory for test files and directories.
|
||||
"-noreport", // Do not generate a final report.
|
||||
"-timeoutFactor:20", // Extend the default timeout (2 min) of all tests by this factor.
|
||||
"-verbose:nopass", // Verbose output but supress it for tests that passed.
|
||||
"-timeoutFactor:5", // Extend the default timeout (2 min) of all tests by this factor.
|
||||
"-verbose:all", // Verbose output.
|
||||
"-tl:200", // Do not run tests which specify a timeout longer than 200s.
|
||||
},
|
||||
tests...,
|
||||
)
|
||||
|
|
|
@ -22,6 +22,7 @@ import (
|
|||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
"gvisor.dev/gvisor/test/runtimes/proctor/lib"
|
||||
|
@ -32,6 +33,7 @@ var (
|
|||
list = flag.Bool("list", false, "list all available tests")
|
||||
testNames = flag.String("tests", "", "run a subset of the available tests")
|
||||
pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children")
|
||||
timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout")
|
||||
)
|
||||
|
||||
// setNumFilesLimit changes the NOFILE soft rlimit if it is too high.
|
||||
|
@ -69,6 +71,8 @@ func main() {
|
|||
log.Fatalf("runtime flag must be provided")
|
||||
}
|
||||
|
||||
timer := time.NewTimer(*timeout)
|
||||
|
||||
tr, err := lib.TestRunnerForRuntime(*runtime)
|
||||
if err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
|
@ -86,6 +90,14 @@ func main() {
|
|||
return
|
||||
}
|
||||
|
||||
// heartbeat
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(15 * time.Second)
|
||||
log.Println("Proctor checking in " + time.Now().String())
|
||||
}
|
||||
}()
|
||||
|
||||
var tests []string
|
||||
if *testNames == "" {
|
||||
// Run every test.
|
||||
|
@ -104,6 +116,33 @@ func main() {
|
|||
|
||||
// Run tests.
|
||||
cmds := tr.TestCmds(tests)
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
go func() {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case <-timer.C:
|
||||
log.Println("The timeout duration is exceeded")
|
||||
killed := false
|
||||
for _, cmd := range cmds {
|
||||
p := cmd.Process
|
||||
if p == nil || cmd.ProcessState != nil {
|
||||
continue
|
||||
}
|
||||
pid := p.Pid
|
||||
if pid > 0 {
|
||||
unix.Kill(pid, unix.SIGTERM)
|
||||
killed = true
|
||||
}
|
||||
}
|
||||
if killed {
|
||||
// Let tests to handle signals
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
panic("FAIL: The timeout duration is exceeded")
|
||||
}
|
||||
}()
|
||||
for _, cmd := range cmds {
|
||||
cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
|
|
|
@ -131,7 +131,7 @@ func getTests(ctx context.Context, d *dockerutil.Container, lang, image string,
|
|||
}
|
||||
|
||||
go func() {
|
||||
output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", lang, "--tests", strings.Join(tcs, ","))
|
||||
output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", lang, "--tests", strings.Join(tcs, ","), fmt.Sprintf("--timeout=%s", timeout))
|
||||
close(done)
|
||||
}()
|
||||
|
||||
|
@ -141,9 +141,10 @@ func getTests(ctx context.Context, d *dockerutil.Container, lang, image string,
|
|||
fmt.Printf("PASS: (%v) %d tests passed\n", time.Since(now), len(tcs))
|
||||
return
|
||||
}
|
||||
t.Errorf("FAIL: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
|
||||
case <-time.After(timeout):
|
||||
t.Errorf("TIMEOUT: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
|
||||
t.Fatalf("FAIL: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
|
||||
// Add one minute to let proctor handle timeout.
|
||||
case <-time.After(timeout + time.Minute):
|
||||
t.Fatalf("TIMEOUT: (%v):\nBatch:\n%s\nOutput:\n%s\n", time.Since(now), strings.Join(tcs, "\n"), output)
|
||||
}
|
||||
},
|
||||
})
|
||||
|
|
|
@ -29,7 +29,7 @@ var (
|
|||
image = flag.String("image", "", "docker image with runtime tests")
|
||||
excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment")
|
||||
batchSize = flag.Int("batch", 50, "number of test cases run in one command")
|
||||
timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout")
|
||||
timeout = flag.Duration("timeout", 20*time.Minute, "batch timeout")
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
|
Loading…
Reference in New Issue