1
0
mirror of https://github.com/henrydcase/nobs.git synced 2024-11-25 16:41:32 +00:00
nobs/hash/sha3/xor_unaligned.go

69 lines
1.3 KiB
Go
Raw Permalink Normal View History

2018-06-01 00:02:53 +01:00
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64 386 ppc64le
// +build !appengine
package sha3
sha3: optimizations and cleanup (#41) * complate reset of the SHA-3 code. Affects mostly the code in sha3.go * fixes a bug which causes SHAKE implementation to crash * implementation of Read()/Write() avoid unnecessary buffering as much as possible * NOTE: at some point I've done separated implementation for SumXXX, functions, but after optimizing implementation of Read/Write/Sum, the gain wasn't that big Current speed on Initial speed on i7-8665U@1.90 BenchmarkPermutationFunction 1592787 736 ns/op 271.90 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/224 98752 11630 ns/op 176.02 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/256 92508 12447 ns/op 164.46 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/384 76765 15206 ns/op 134.62 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/512 54333 21932 ns/op 93.33 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/224 10000 102161 ns/op 160.37 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/256 10000 106531 ns/op 153.80 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/384 8641 137272 ns/op 119.35 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/512 6340 189124 ns/op 86.63 MB/s 0 B/op 0 allocs/op BenchmarkShake_x01/SHAKE-128 167062 7149 ns/op 188.83 MB/s 0 B/op 0 allocs/op BenchmarkShake_x01/SHAKE-256 151982 7748 ns/op 174.24 MB/s 0 B/op 0 allocs/op BenchmarkShake_x16/SHAKE-128 12963 87770 ns/op 186.67 MB/s 0 B/op 0 allocs/op BenchmarkShake_x16/SHAKE-256 10000 105554 ns/op 155.22 MB/s 0 B/op 0 allocs/op BenchmarkCShake/cSHAKE-128 109148 10940 ns/op 187.11 MB/s 0 B/op 0 allocs/op BenchmarkCShake/cSHAKE-256 90324 13211 ns/op 154.94 MB/s 0 B/op 0 allocs/op PASS
2020-08-29 02:12:49 +01:00
import (
"unsafe"
)
2018-06-01 00:02:53 +01:00
2020-08-25 12:32:22 +01:00
// A storageBuf is an aligned array of maxRate bytes.
type storageBuf [maxRate / 8]uint64
func (b *storageBuf) asBytes() *[maxRate]byte {
return (*[maxRate]byte)(unsafe.Pointer(b))
}
2018-06-01 00:02:53 +01:00
func xorInUnaligned(d *state, buf []byte) {
n := len(buf)
2020-08-25 12:32:22 +01:00
bw := (*[maxRate / 8]uint64)(unsafe.Pointer(&buf[0]))[: n/8 : n/8]
2018-06-01 00:02:53 +01:00
if n >= 72 {
d.a[0] ^= bw[0]
d.a[1] ^= bw[1]
d.a[2] ^= bw[2]
d.a[3] ^= bw[3]
d.a[4] ^= bw[4]
d.a[5] ^= bw[5]
d.a[6] ^= bw[6]
d.a[7] ^= bw[7]
d.a[8] ^= bw[8]
}
if n >= 104 {
d.a[9] ^= bw[9]
d.a[10] ^= bw[10]
d.a[11] ^= bw[11]
d.a[12] ^= bw[12]
}
if n >= 136 {
d.a[13] ^= bw[13]
d.a[14] ^= bw[14]
d.a[15] ^= bw[15]
d.a[16] ^= bw[16]
}
if n >= 144 {
d.a[17] ^= bw[17]
}
if n >= 168 {
d.a[18] ^= bw[18]
d.a[19] ^= bw[19]
d.a[20] ^= bw[20]
}
}
func copyOutUnaligned(d *state, buf []byte) {
ab := (*[maxRate]uint8)(unsafe.Pointer(&d.a[0]))
copy(buf, ab[:])
}
sha3: optimizations and cleanup (#41) * complate reset of the SHA-3 code. Affects mostly the code in sha3.go * fixes a bug which causes SHAKE implementation to crash * implementation of Read()/Write() avoid unnecessary buffering as much as possible * NOTE: at some point I've done separated implementation for SumXXX, functions, but after optimizing implementation of Read/Write/Sum, the gain wasn't that big Current speed on Initial speed on i7-8665U@1.90 BenchmarkPermutationFunction 1592787 736 ns/op 271.90 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/224 98752 11630 ns/op 176.02 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/256 92508 12447 ns/op 164.46 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/384 76765 15206 ns/op 134.62 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x01/SHA-3/512 54333 21932 ns/op 93.33 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/224 10000 102161 ns/op 160.37 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/256 10000 106531 ns/op 153.80 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/384 8641 137272 ns/op 119.35 MB/s 0 B/op 0 allocs/op BenchmarkSha3Chunk_x16/SHA-3/512 6340 189124 ns/op 86.63 MB/s 0 B/op 0 allocs/op BenchmarkShake_x01/SHAKE-128 167062 7149 ns/op 188.83 MB/s 0 B/op 0 allocs/op BenchmarkShake_x01/SHAKE-256 151982 7748 ns/op 174.24 MB/s 0 B/op 0 allocs/op BenchmarkShake_x16/SHAKE-128 12963 87770 ns/op 186.67 MB/s 0 B/op 0 allocs/op BenchmarkShake_x16/SHAKE-256 10000 105554 ns/op 155.22 MB/s 0 B/op 0 allocs/op BenchmarkCShake/cSHAKE-128 109148 10940 ns/op 187.11 MB/s 0 B/op 0 allocs/op BenchmarkCShake/cSHAKE-256 90324 13211 ns/op 154.94 MB/s 0 B/op 0 allocs/op PASS
2020-08-29 02:12:49 +01:00
// TODO: remove this assignment
2018-06-01 00:02:53 +01:00
var (
xorIn = xorInUnaligned
copyOut = copyOutUnaligned
)
const xorImplementationUnaligned = "unaligned"