2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2019 The gVisor Authors.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
2019-02-01 01:47:24 +00:00
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Package template doesn't exist. This file must be instantiated using the
|
|
|
|
// go_template_instance rule in tools/go_generics/defs.bzl.
|
|
|
|
package template
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"reflect"
|
|
|
|
"strings"
|
|
|
|
"unsafe"
|
|
|
|
|
2020-01-10 06:00:42 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sync"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Value is a required type parameter.
|
|
|
|
//
|
|
|
|
// Value must not contain any pointers, including interface objects, function
|
|
|
|
// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs
|
|
|
|
// containing any of the above. An init() function will panic if this property
|
|
|
|
// does not hold.
|
|
|
|
type Value struct{}
|
|
|
|
|
|
|
|
// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
// with any writer critical sections in seq.
|
|
|
|
//
|
|
|
|
//go:nosplit
|
|
|
|
func SeqAtomicLoad(seq *sync.SeqCount, ptr *Value) Value {
|
2018-04-27 17:37:02 +00:00
|
|
|
for {
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
if val, ok := SeqAtomicTryLoad(seq, seq.BeginRead(), ptr); ok {
|
|
|
|
return val
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
// in seq initiated by a call to seq.BeginRead() that returned epoch. If the
|
|
|
|
// read would race with a writer critical section, SeqAtomicTryLoad returns
|
2018-04-27 17:37:02 +00:00
|
|
|
// (unspecified, false).
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
//
|
|
|
|
//go:nosplit
|
|
|
|
func SeqAtomicTryLoad(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Value) (val Value, ok bool) {
|
2020-01-10 06:00:42 +00:00
|
|
|
if sync.RaceEnabled {
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
// runtime.RaceDisable() doesn't actually stop the race detector, so it
|
|
|
|
// can't help us here. Instead, call runtime.memmove directly, which is
|
|
|
|
// not instrumented by the race detector.
|
2020-01-10 06:00:42 +00:00
|
|
|
sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
|
2018-04-27 17:37:02 +00:00
|
|
|
} else {
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
// This is ~40% faster for short reads than going through memmove.
|
2018-04-27 17:37:02 +00:00
|
|
|
val = *ptr
|
|
|
|
}
|
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead()
to be inlined.
- Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to
inline it. (Best I could get was "cost 92 exceeds budget 80".)
- Use runtime-guided spinning in SeqCount.BeginRead().
Benchmarks:
name old time/op new time/op delta
pkg:pkg/sync/sync goos:linux goarch:amd64
SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10)
SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8)
pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64
SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8)
SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8)
AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8)
(atomic.Value.Load is, of course, inlined. We would expect an uncontended
inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The
"regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing
nothing in that path, is attributed to microarchitectural subtlety; the
benchmark loop is unchanged except for its address:
Before this CL:
:0 0x4e62d1 48ffc2 INCQ DX
:0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e62db 7e26 JLE 0x4e6303
:0 0x4e62dd 90 NOPL
:0 0x4e62de bb01000000 MOVL $0x1, BX
:0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62e7 ffc3 INCL BX
:0 0x4e62e9 0fbae300 BTL $0x0, BX
:0 0x4e62ed 733a JAE 0x4e6329
:0 0x4e62ef 90 NOPL
:0 0x4e62f0 bb01000000 MOVL $0x1, BX
:0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e62f9 ffc3 INCL BX
:0 0x4e62fb 0fbae300 BTL $0x0, BX
:0 0x4e62ff 73d0 JAE 0x4e62d1
After this CL:
:0 0x4e6361 48ffc2 INCQ DX
:0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX)
:0 0x4e636b 7e26 JLE 0x4e6393
:0 0x4e636d 90 NOPL
:0 0x4e636e bb01000000 MOVL $0x1, BX
:0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6377 ffc3 INCL BX
:0 0x4e6379 0fbae300 BTL $0x0, BX
:0 0x4e637d 733a JAE 0x4e63b9
:0 0x4e637f 90 NOPL
:0 0x4e6380 bb01000000 MOVL $0x1, BX
:0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX)
:0 0x4e6389 ffc3 INCL BX
:0 0x4e638b 0fbae300 BTL $0x0, BX
:0 0x4e638f 73d0 JAE 0x4e6361
PiperOrigin-RevId: 329754148
2020-09-02 18:35:18 +00:00
|
|
|
ok = seq.ReadOk(epoch)
|
|
|
|
return
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
var val Value
|
|
|
|
typ := reflect.TypeOf(val)
|
|
|
|
name := typ.Name()
|
2020-01-10 06:00:42 +00:00
|
|
|
if ptrs := sync.PointersInType(typ, name); len(ptrs) != 0 {
|
2018-04-27 17:37:02 +00:00
|
|
|
panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n")))
|
|
|
|
}
|
|
|
|
}
|