gvisor/test/benchmarks/ml/tensorflow_test.go

// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ml

import (
	"context"
	"os"
	"testing"

	"gvisor.dev/gvisor/pkg/test/dockerutil"
	"gvisor.dev/gvisor/test/benchmarks/harness"
)

// BenchmarkTensorflow runs workloads from a TensorFlow tutorial.
// See: https://github.com/aymericdamien/TensorFlow-Examples
func BenchmarkTensorflow(b *testing.B) {
	workloads := map[string]string{
		"GradientDecisionTree": "2_BasicModels/gradient_boosted_decision_tree.py",
		"Kmeans":               "2_BasicModels/kmeans.py",
		"LogisticRegression":   "2_BasicModels/logistic_regression.py",
		"NearestNeighbor":      "2_BasicModels/nearest_neighbor.py",
		"RandomForest":         "2_BasicModels/random_forest.py",
		"ConvolutionalNetwork": "3_NeuralNetworks/convolutional_network.py",
		"MultilayerPerceptron": "3_NeuralNetworks/multilayer_perceptron.py",
		"NeuralNetwork":        "3_NeuralNetworks/neural_network.py",
	}

	machine, err := harness.GetMachine()
	if err != nil {
		b.Fatalf("failed to get machine: %v", err)
	}
	defer machine.CleanUp()

	for name, workload := range workloads {
		b.Run(name, func(b *testing.B) {
			ctx := context.Background()

			b.ResetTimer()
			b.StopTimer()

			for i := 0; i < b.N; i++ {
				container := machine.GetContainer(ctx, b)
				defer container.CleanUp(ctx)
				if err := harness.DropCaches(machine); err != nil {
					b.Skipf("failed to drop caches: %v. You probably need root.", err)
				}

				// Run tensorflow.
				b.StartTimer()
				if out, err := container.Run(ctx, dockerutil.RunOpts{
					Image:   "benchmarks/tensorflow",
					Env:     []string{"PYTHONPATH=$PYTHONPATH:/TensorFlow-Examples/examples"},
					WorkDir: "/TensorFlow-Examples/examples",
				}, "python", workload); err != nil {
					b.Fatalf("failed to run container: %v logs: %s", err, out)
				}
				b.StopTimer()
			}
		})
	}
}

func TestMain(m *testing.M) {
	harness.Init()
	harness.SetFixedBenchmarks()
	os.Exit(m.Run())
}
Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`// Copyright 2020 The gVisor Authors.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`package ml`

			`import (`
			`"context"`
Add tensorflow, ffmpeg, and redis jobs. PiperOrigin-RevId: 346603153 2020-12-09 19:55:06 +00:00			`"os"`
Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`"testing"`

			`"gvisor.dev/gvisor/pkg/test/dockerutil"`
			`"gvisor.dev/gvisor/test/benchmarks/harness"`
			`)`

			`// BenchmarkTensorflow runs workloads from a TensorFlow tutorial.`
			`// See: https://github.com/aymericdamien/TensorFlow-Examples`
			`func BenchmarkTensorflow(b *testing.B) {`
			`workloads := map[string]string{`
			`"GradientDecisionTree": "2_BasicModels/gradient_boosted_decision_tree.py",`
			`"Kmeans": "2_BasicModels/kmeans.py",`
			`"LogisticRegression": "2_BasicModels/logistic_regression.py",`
			`"NearestNeighbor": "2_BasicModels/nearest_neighbor.py",`
			`"RandomForest": "2_BasicModels/random_forest.py",`
			`"ConvolutionalNetwork": "3_NeuralNetworks/convolutional_network.py",`
			`"MultilayerPerceptron": "3_NeuralNetworks/multilayer_perceptron.py",`
			`"NeuralNetwork": "3_NeuralNetworks/neural_network.py",`
			`}`

Simplify profiling and benchmarks. - Tweak the benchmarks to work with b.N where appropriate. In many cases, b.N was simply being ignored. This creates an implicit dependency in the user passing a reasonable benchtime (less than or equal to the actual runtime of the test, or using the X syntax) otherwise the test runs forever. - In cases where the above is impossible, explicitly set benchtime from the test wrapper, to prevent the above behavior (tensorflow). - Drop the *Reverse variants, which are simply hey benchmarks. We should just add a hey benchmark. The platforms benchmarks already include a native platform, and thus these benchmarks are incredibly confusing. (In other words, BenchmarkNginxReverse has nothing to do with an nginx benchmark for runsc.) - Remove the redunant Harness object, which contains no state, in order to slightly simplify the code. - Make Block and Heap profiling actually work, but setting appropriate runtime parameters (and plumbing them through the config). - Split the profiling into two phases: start and stop, since some will need to be started early, and others will need to happen at the end. PiperOrigin-RevId: 349495377 2020-12-30 02:26:46 +00:00			`machine, err := harness.GetMachine()`
Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`if err != nil {`
			`b.Fatalf("failed to get machine: %v", err)`
			`}`
			`defer machine.CleanUp()`

			`for name, workload := range workloads {`
			`b.Run(name, func(b *testing.B) {`
			`ctx := context.Background()`

			`b.ResetTimer()`
Simplify profiling and benchmarks. - Tweak the benchmarks to work with b.N where appropriate. In many cases, b.N was simply being ignored. This creates an implicit dependency in the user passing a reasonable benchtime (less than or equal to the actual runtime of the test, or using the X syntax) otherwise the test runs forever. - In cases where the above is impossible, explicitly set benchtime from the test wrapper, to prevent the above behavior (tensorflow). - Drop the *Reverse variants, which are simply hey benchmarks. We should just add a hey benchmark. The platforms benchmarks already include a native platform, and thus these benchmarks are incredibly confusing. (In other words, BenchmarkNginxReverse has nothing to do with an nginx benchmark for runsc.) - Remove the redunant Harness object, which contains no state, in order to slightly simplify the code. - Make Block and Heap profiling actually work, but setting appropriate runtime parameters (and plumbing them through the config). - Split the profiling into two phases: start and stop, since some will need to be started early, and others will need to happen at the end. PiperOrigin-RevId: 349495377 2020-12-30 02:26:46 +00:00			`b.StopTimer()`

Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`for i := 0; i < b.N; i++ {`
Add tensorflow, ffmpeg, and redis jobs. PiperOrigin-RevId: 346603153 2020-12-09 19:55:06 +00:00			`container := machine.GetContainer(ctx, b)`
			`defer container.CleanUp(ctx)`
Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`if err := harness.DropCaches(machine); err != nil {`
			`b.Skipf("failed to drop caches: %v. You probably need root.", err)`
			`}`

Simplify profiling and benchmarks. - Tweak the benchmarks to work with b.N where appropriate. In many cases, b.N was simply being ignored. This creates an implicit dependency in the user passing a reasonable benchtime (less than or equal to the actual runtime of the test, or using the X syntax) otherwise the test runs forever. - In cases where the above is impossible, explicitly set benchtime from the test wrapper, to prevent the above behavior (tensorflow). - Drop the *Reverse variants, which are simply hey benchmarks. We should just add a hey benchmark. The platforms benchmarks already include a native platform, and thus these benchmarks are incredibly confusing. (In other words, BenchmarkNginxReverse has nothing to do with an nginx benchmark for runsc.) - Remove the redunant Harness object, which contains no state, in order to slightly simplify the code. - Make Block and Heap profiling actually work, but setting appropriate runtime parameters (and plumbing them through the config). - Split the profiling into two phases: start and stop, since some will need to be started early, and others will need to happen at the end. PiperOrigin-RevId: 349495377 2020-12-30 02:26:46 +00:00			`// Run tensorflow.`
			`b.StartTimer()`
Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`if out, err := container.Run(ctx, dockerutil.RunOpts{`
			`Image: "benchmarks/tensorflow",`
			`Env: []string{"PYTHONPATH=$PYTHONPATH:/TensorFlow-Examples/examples"},`
			`WorkDir: "/TensorFlow-Examples/examples",`
			`}, "python", workload); err != nil {`
			`b.Fatalf("failed to run container: %v logs: %s", err, out)`
			`}`
Simplify profiling and benchmarks. - Tweak the benchmarks to work with b.N where appropriate. In many cases, b.N was simply being ignored. This creates an implicit dependency in the user passing a reasonable benchtime (less than or equal to the actual runtime of the test, or using the X syntax) otherwise the test runs forever. - In cases where the above is impossible, explicitly set benchtime from the test wrapper, to prevent the above behavior (tensorflow). - Drop the *Reverse variants, which are simply hey benchmarks. We should just add a hey benchmark. The platforms benchmarks already include a native platform, and thus these benchmarks are incredibly confusing. (In other words, BenchmarkNginxReverse has nothing to do with an nginx benchmark for runsc.) - Remove the redunant Harness object, which contains no state, in order to slightly simplify the code. - Make Block and Heap profiling actually work, but setting appropriate runtime parameters (and plumbing them through the config). - Split the profiling into two phases: start and stop, since some will need to be started early, and others will need to happen at the end. PiperOrigin-RevId: 349495377 2020-12-30 02:26:46 +00:00			`b.StopTimer()`
Port tensorflow benchmark. PiperOrigin-RevId: 323633737 2020-07-28 19:51:54 +00:00			`}`
			`})`
			`}`
			`}`
Add tensorflow, ffmpeg, and redis jobs. PiperOrigin-RevId: 346603153 2020-12-09 19:55:06 +00:00
			`func TestMain(m *testing.M) {`
Simplify profiling and benchmarks. - Tweak the benchmarks to work with b.N where appropriate. In many cases, b.N was simply being ignored. This creates an implicit dependency in the user passing a reasonable benchtime (less than or equal to the actual runtime of the test, or using the X syntax) otherwise the test runs forever. - In cases where the above is impossible, explicitly set benchtime from the test wrapper, to prevent the above behavior (tensorflow). - Drop the *Reverse variants, which are simply hey benchmarks. We should just add a hey benchmark. The platforms benchmarks already include a native platform, and thus these benchmarks are incredibly confusing. (In other words, BenchmarkNginxReverse has nothing to do with an nginx benchmark for runsc.) - Remove the redunant Harness object, which contains no state, in order to slightly simplify the code. - Make Block and Heap profiling actually work, but setting appropriate runtime parameters (and plumbing them through the config). - Split the profiling into two phases: start and stop, since some will need to be started early, and others will need to happen at the end. PiperOrigin-RevId: 349495377 2020-12-30 02:26:46 +00:00			`harness.Init()`
			`harness.SetFixedBenchmarks()`
Add tensorflow, ffmpeg, and redis jobs. PiperOrigin-RevId: 346603153 2020-12-09 19:55:06 +00:00			`os.Exit(m.Run())`
			`}`