diff --git a/hash/sha3/LICENSE b/hash/sha3/LICENSE deleted file mode 100644 index 00968cb..0000000 --- a/hash/sha3/LICENSE +++ /dev/null @@ -1,46 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -=== - -cSHAKE code is invention of Krzysztof Kwiatkowski and is licensed under -license below: - - DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE - Version 2, December 2004 - - Copyright (C) 2018 Krzysztof Kwiatkowski - - Everyone is permitted to copy and distribute verbatim or modified - copies of this license document, and changing it is allowed as long - as the name is changed. - - DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/hash/sha3/sha3_test.go b/hash/sha3/sha3_test.go index c79a0cd..7535e2c 100644 --- a/hash/sha3/sha3_test.go +++ b/hash/sha3/sha3_test.go @@ -314,7 +314,7 @@ func TestReset(t *testing.T) { for _, v := range testShakes { // Calculate hash for the first time - c := v.constructor(nil, []byte{0x99, 0x98}) + c := v.constructor([]byte(v.defAlgoName), []byte(v.defCustomStr)) c.Write(sequentialBytes(0x100)) c.Read(out1) @@ -335,7 +335,7 @@ func TestClone(t *testing.T) { in := sequentialBytes(0x100) for _, v := range testShakes { - h1 := v.constructor(nil, []byte{0x01}) + h1 := v.constructor([]byte(v.defAlgoName), []byte(v.defCustomStr)) h1.Write([]byte{0x01}) h2 := h1.Clone() @@ -472,3 +472,19 @@ func ExampleCShake256() { //a90a4c6ca9af2156eba43dc8398279e6b60dcd56fb21837afe6c308fd4ceb05b9dd98c6ee866ca7dc5a39d53e960f400bcd5a19c8a2d6ec6459f63696543a0d8 //85e73a72228d08b46515553ca3a29d47df3047e5d84b12d6c2c63e579f4fd1105716b7838e92e981863907f434bfd4443c9e56ea09da998d2f9b47db71988109 } + +func TestXXX(t *testing.T) { + var out1, out2 [32]byte + h := NewCShake256([]byte("CSHAKE256"), []byte("CustomStrign")) + + h.Write([]byte{0x1, 0x2, 0x3}) + h.Read(out1[:]) + + h.Reset() + + h.Write([]byte{0x1, 0x2, 0x3}) + h.Read(out2[:]) + + fmt.Println(out1) + fmt.Println(out2) +} diff --git a/hash/shake/hashes_generic.go b/hash/shake/hashes_generic.go new file mode 100644 index 0000000..c6da68a --- /dev/null +++ b/hash/shake/hashes_generic.go @@ -0,0 +1,27 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//+build gccgo appengine !s390x + +package shake + +import ( + "hash" +) + +// new224Asm returns an assembly implementation of SHA3-224 if available, +// otherwise it returns nil. +func new224Asm() hash.Hash { return nil } + +// new256Asm returns an assembly implementation of SHA3-256 if available, +// otherwise it returns nil. +func new256Asm() hash.Hash { return nil } + +// new384Asm returns an assembly implementation of SHA3-384 if available, +// otherwise it returns nil. +func new384Asm() hash.Hash { return nil } + +// new512Asm returns an assembly implementation of SHA3-512 if available, +// otherwise it returns nil. +func new512Asm() hash.Hash { return nil } diff --git a/hash/shake/keccakf.go b/hash/shake/keccakf.go new file mode 100644 index 0000000..9323123 --- /dev/null +++ b/hash/shake/keccakf.go @@ -0,0 +1,412 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine gccgo + +package shake + +// rc stores the round constants for use in the ι step. +var rc = [24]uint64{ + 0x0000000000000001, + 0x0000000000008082, + 0x800000000000808A, + 0x8000000080008000, + 0x000000000000808B, + 0x0000000080000001, + 0x8000000080008081, + 0x8000000000008009, + 0x000000000000008A, + 0x0000000000000088, + 0x0000000080008009, + 0x000000008000000A, + 0x000000008000808B, + 0x800000000000008B, + 0x8000000000008089, + 0x8000000000008003, + 0x8000000000008002, + 0x8000000000000080, + 0x000000000000800A, + 0x800000008000000A, + 0x8000000080008081, + 0x8000000000008080, + 0x0000000080000001, + 0x8000000080008008, +} + +// keccakF1600 applies the Keccak permutation to a 1600b-wide +// state represented as a slice of 25 uint64s. +func keccakF1600(a *[25]uint64) { + // Implementation translated from Keccak-inplace.c + // in the keccak reference code. + var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64 + + for i := 0; i < 24; i += 4 { + // Combines the 5 steps in each round into 2 steps. + // Unrolls 4 rounds per loop and spreads some steps across rounds. + + // Round 1 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[6] ^ d1 + bc1 = t<<44 | t>>(64-44) + t = a[12] ^ d2 + bc2 = t<<43 | t>>(64-43) + t = a[18] ^ d3 + bc3 = t<<21 | t>>(64-21) + t = a[24] ^ d4 + bc4 = t<<14 | t>>(64-14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i] + a[6] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc2 = t<<3 | t>>(64-3) + t = a[16] ^ d1 + bc3 = t<<45 | t>>(64-45) + t = a[22] ^ d2 + bc4 = t<<61 | t>>(64-61) + t = a[3] ^ d3 + bc0 = t<<28 | t>>(64-28) + t = a[9] ^ d4 + bc1 = t<<20 | t>>(64-20) + a[10] = bc0 ^ (bc2 &^ bc1) + a[16] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc4 = t<<18 | t>>(64-18) + t = a[1] ^ d1 + bc0 = t<<1 | t>>(64-1) + t = a[7] ^ d2 + bc1 = t<<6 | t>>(64-6) + t = a[13] ^ d3 + bc2 = t<<25 | t>>(64-25) + t = a[19] ^ d4 + bc3 = t<<8 | t>>(64-8) + a[20] = bc0 ^ (bc2 &^ bc1) + a[1] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc1 = t<<36 | t>>(64-36) + t = a[11] ^ d1 + bc2 = t<<10 | t>>(64-10) + t = a[17] ^ d2 + bc3 = t<<15 | t>>(64-15) + t = a[23] ^ d3 + bc4 = t<<56 | t>>(64-56) + t = a[4] ^ d4 + bc0 = t<<27 | t>>(64-27) + a[5] = bc0 ^ (bc2 &^ bc1) + a[11] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc3 = t<<41 | t>>(64-41) + t = a[21] ^ d1 + bc4 = t<<2 | t>>(64-2) + t = a[2] ^ d2 + bc0 = t<<62 | t>>(64-62) + t = a[8] ^ d3 + bc1 = t<<55 | t>>(64-55) + t = a[14] ^ d4 + bc2 = t<<39 | t>>(64-39) + a[15] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + // Round 2 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[16] ^ d1 + bc1 = t<<44 | t>>(64-44) + t = a[7] ^ d2 + bc2 = t<<43 | t>>(64-43) + t = a[23] ^ d3 + bc3 = t<<21 | t>>(64-21) + t = a[14] ^ d4 + bc4 = t<<14 | t>>(64-14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1] + a[16] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc2 = t<<3 | t>>(64-3) + t = a[11] ^ d1 + bc3 = t<<45 | t>>(64-45) + t = a[2] ^ d2 + bc4 = t<<61 | t>>(64-61) + t = a[18] ^ d3 + bc0 = t<<28 | t>>(64-28) + t = a[9] ^ d4 + bc1 = t<<20 | t>>(64-20) + a[20] = bc0 ^ (bc2 &^ bc1) + a[11] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc4 = t<<18 | t>>(64-18) + t = a[6] ^ d1 + bc0 = t<<1 | t>>(64-1) + t = a[22] ^ d2 + bc1 = t<<6 | t>>(64-6) + t = a[13] ^ d3 + bc2 = t<<25 | t>>(64-25) + t = a[4] ^ d4 + bc3 = t<<8 | t>>(64-8) + a[15] = bc0 ^ (bc2 &^ bc1) + a[6] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc1 = t<<36 | t>>(64-36) + t = a[1] ^ d1 + bc2 = t<<10 | t>>(64-10) + t = a[17] ^ d2 + bc3 = t<<15 | t>>(64-15) + t = a[8] ^ d3 + bc4 = t<<56 | t>>(64-56) + t = a[24] ^ d4 + bc0 = t<<27 | t>>(64-27) + a[10] = bc0 ^ (bc2 &^ bc1) + a[1] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc3 = t<<41 | t>>(64-41) + t = a[21] ^ d1 + bc4 = t<<2 | t>>(64-2) + t = a[12] ^ d2 + bc0 = t<<62 | t>>(64-62) + t = a[3] ^ d3 + bc1 = t<<55 | t>>(64-55) + t = a[19] ^ d4 + bc2 = t<<39 | t>>(64-39) + a[5] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + // Round 3 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[11] ^ d1 + bc1 = t<<44 | t>>(64-44) + t = a[22] ^ d2 + bc2 = t<<43 | t>>(64-43) + t = a[8] ^ d3 + bc3 = t<<21 | t>>(64-21) + t = a[19] ^ d4 + bc4 = t<<14 | t>>(64-14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2] + a[11] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc2 = t<<3 | t>>(64-3) + t = a[1] ^ d1 + bc3 = t<<45 | t>>(64-45) + t = a[12] ^ d2 + bc4 = t<<61 | t>>(64-61) + t = a[23] ^ d3 + bc0 = t<<28 | t>>(64-28) + t = a[9] ^ d4 + bc1 = t<<20 | t>>(64-20) + a[15] = bc0 ^ (bc2 &^ bc1) + a[1] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc4 = t<<18 | t>>(64-18) + t = a[16] ^ d1 + bc0 = t<<1 | t>>(64-1) + t = a[2] ^ d2 + bc1 = t<<6 | t>>(64-6) + t = a[13] ^ d3 + bc2 = t<<25 | t>>(64-25) + t = a[24] ^ d4 + bc3 = t<<8 | t>>(64-8) + a[5] = bc0 ^ (bc2 &^ bc1) + a[16] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc1 = t<<36 | t>>(64-36) + t = a[6] ^ d1 + bc2 = t<<10 | t>>(64-10) + t = a[17] ^ d2 + bc3 = t<<15 | t>>(64-15) + t = a[3] ^ d3 + bc4 = t<<56 | t>>(64-56) + t = a[14] ^ d4 + bc0 = t<<27 | t>>(64-27) + a[20] = bc0 ^ (bc2 &^ bc1) + a[6] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc3 = t<<41 | t>>(64-41) + t = a[21] ^ d1 + bc4 = t<<2 | t>>(64-2) + t = a[7] ^ d2 + bc0 = t<<62 | t>>(64-62) + t = a[18] ^ d3 + bc1 = t<<55 | t>>(64-55) + t = a[4] ^ d4 + bc2 = t<<39 | t>>(64-39) + a[10] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + // Round 4 + bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] + bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] + bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] + bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] + bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] + d0 = bc4 ^ (bc1<<1 | bc1>>63) + d1 = bc0 ^ (bc2<<1 | bc2>>63) + d2 = bc1 ^ (bc3<<1 | bc3>>63) + d3 = bc2 ^ (bc4<<1 | bc4>>63) + d4 = bc3 ^ (bc0<<1 | bc0>>63) + + bc0 = a[0] ^ d0 + t = a[1] ^ d1 + bc1 = t<<44 | t>>(64-44) + t = a[2] ^ d2 + bc2 = t<<43 | t>>(64-43) + t = a[3] ^ d3 + bc3 = t<<21 | t>>(64-21) + t = a[4] ^ d4 + bc4 = t<<14 | t>>(64-14) + a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3] + a[1] = bc1 ^ (bc3 &^ bc2) + a[2] = bc2 ^ (bc4 &^ bc3) + a[3] = bc3 ^ (bc0 &^ bc4) + a[4] = bc4 ^ (bc1 &^ bc0) + + t = a[5] ^ d0 + bc2 = t<<3 | t>>(64-3) + t = a[6] ^ d1 + bc3 = t<<45 | t>>(64-45) + t = a[7] ^ d2 + bc4 = t<<61 | t>>(64-61) + t = a[8] ^ d3 + bc0 = t<<28 | t>>(64-28) + t = a[9] ^ d4 + bc1 = t<<20 | t>>(64-20) + a[5] = bc0 ^ (bc2 &^ bc1) + a[6] = bc1 ^ (bc3 &^ bc2) + a[7] = bc2 ^ (bc4 &^ bc3) + a[8] = bc3 ^ (bc0 &^ bc4) + a[9] = bc4 ^ (bc1 &^ bc0) + + t = a[10] ^ d0 + bc4 = t<<18 | t>>(64-18) + t = a[11] ^ d1 + bc0 = t<<1 | t>>(64-1) + t = a[12] ^ d2 + bc1 = t<<6 | t>>(64-6) + t = a[13] ^ d3 + bc2 = t<<25 | t>>(64-25) + t = a[14] ^ d4 + bc3 = t<<8 | t>>(64-8) + a[10] = bc0 ^ (bc2 &^ bc1) + a[11] = bc1 ^ (bc3 &^ bc2) + a[12] = bc2 ^ (bc4 &^ bc3) + a[13] = bc3 ^ (bc0 &^ bc4) + a[14] = bc4 ^ (bc1 &^ bc0) + + t = a[15] ^ d0 + bc1 = t<<36 | t>>(64-36) + t = a[16] ^ d1 + bc2 = t<<10 | t>>(64-10) + t = a[17] ^ d2 + bc3 = t<<15 | t>>(64-15) + t = a[18] ^ d3 + bc4 = t<<56 | t>>(64-56) + t = a[19] ^ d4 + bc0 = t<<27 | t>>(64-27) + a[15] = bc0 ^ (bc2 &^ bc1) + a[16] = bc1 ^ (bc3 &^ bc2) + a[17] = bc2 ^ (bc4 &^ bc3) + a[18] = bc3 ^ (bc0 &^ bc4) + a[19] = bc4 ^ (bc1 &^ bc0) + + t = a[20] ^ d0 + bc3 = t<<41 | t>>(64-41) + t = a[21] ^ d1 + bc4 = t<<2 | t>>(64-2) + t = a[22] ^ d2 + bc0 = t<<62 | t>>(64-62) + t = a[23] ^ d3 + bc1 = t<<55 | t>>(64-55) + t = a[24] ^ d4 + bc2 = t<<39 | t>>(64-39) + a[20] = bc0 ^ (bc2 &^ bc1) + a[21] = bc1 ^ (bc3 &^ bc2) + a[22] = bc2 ^ (bc4 &^ bc3) + a[23] = bc3 ^ (bc0 &^ bc4) + a[24] = bc4 ^ (bc1 &^ bc0) + } +} diff --git a/hash/shake/keccakf_amd64.go b/hash/shake/keccakf_amd64.go new file mode 100644 index 0000000..de8aa56 --- /dev/null +++ b/hash/shake/keccakf_amd64.go @@ -0,0 +1,13 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +package shake + +// This function is implemented in keccakf_amd64.s. + +//go:noescape + +func keccakF1600(a *[25]uint64) diff --git a/hash/shake/keccakf_amd64.s b/hash/shake/keccakf_amd64.s new file mode 100644 index 0000000..f88533a --- /dev/null +++ b/hash/shake/keccakf_amd64.s @@ -0,0 +1,390 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +// This code was translated into a form compatible with 6a from the public +// domain sources at https://github.com/gvanas/KeccakCodePackage + +// Offsets in state +#define _ba (0*8) +#define _be (1*8) +#define _bi (2*8) +#define _bo (3*8) +#define _bu (4*8) +#define _ga (5*8) +#define _ge (6*8) +#define _gi (7*8) +#define _go (8*8) +#define _gu (9*8) +#define _ka (10*8) +#define _ke (11*8) +#define _ki (12*8) +#define _ko (13*8) +#define _ku (14*8) +#define _ma (15*8) +#define _me (16*8) +#define _mi (17*8) +#define _mo (18*8) +#define _mu (19*8) +#define _sa (20*8) +#define _se (21*8) +#define _si (22*8) +#define _so (23*8) +#define _su (24*8) + +// Temporary registers +#define rT1 AX + +// Round vars +#define rpState DI +#define rpStack SP + +#define rDa BX +#define rDe CX +#define rDi DX +#define rDo R8 +#define rDu R9 + +#define rBa R10 +#define rBe R11 +#define rBi R12 +#define rBo R13 +#define rBu R14 + +#define rCa SI +#define rCe BP +#define rCi rBi +#define rCo rBo +#define rCu R15 + +#define MOVQ_RBI_RCE MOVQ rBi, rCe +#define XORQ_RT1_RCA XORQ rT1, rCa +#define XORQ_RT1_RCE XORQ rT1, rCe +#define XORQ_RBA_RCU XORQ rBa, rCu +#define XORQ_RBE_RCU XORQ rBe, rCu +#define XORQ_RDU_RCU XORQ rDu, rCu +#define XORQ_RDA_RCA XORQ rDa, rCa +#define XORQ_RDE_RCE XORQ rDe, rCe + +#define mKeccakRound(iState, oState, rc, B_RBI_RCE, G_RT1_RCA, G_RT1_RCE, G_RBA_RCU, K_RT1_RCA, K_RT1_RCE, K_RBA_RCU, M_RT1_RCA, M_RT1_RCE, M_RBE_RCU, S_RDU_RCU, S_RDA_RCA, S_RDE_RCE) \ + /* Prepare round */ \ + MOVQ rCe, rDa; \ + ROLQ $1, rDa; \ + \ + MOVQ _bi(iState), rCi; \ + XORQ _gi(iState), rDi; \ + XORQ rCu, rDa; \ + XORQ _ki(iState), rCi; \ + XORQ _mi(iState), rDi; \ + XORQ rDi, rCi; \ + \ + MOVQ rCi, rDe; \ + ROLQ $1, rDe; \ + \ + MOVQ _bo(iState), rCo; \ + XORQ _go(iState), rDo; \ + XORQ rCa, rDe; \ + XORQ _ko(iState), rCo; \ + XORQ _mo(iState), rDo; \ + XORQ rDo, rCo; \ + \ + MOVQ rCo, rDi; \ + ROLQ $1, rDi; \ + \ + MOVQ rCu, rDo; \ + XORQ rCe, rDi; \ + ROLQ $1, rDo; \ + \ + MOVQ rCa, rDu; \ + XORQ rCi, rDo; \ + ROLQ $1, rDu; \ + \ + /* Result b */ \ + MOVQ _ba(iState), rBa; \ + MOVQ _ge(iState), rBe; \ + XORQ rCo, rDu; \ + MOVQ _ki(iState), rBi; \ + MOVQ _mo(iState), rBo; \ + MOVQ _su(iState), rBu; \ + XORQ rDe, rBe; \ + ROLQ $44, rBe; \ + XORQ rDi, rBi; \ + XORQ rDa, rBa; \ + ROLQ $43, rBi; \ + \ + MOVQ rBe, rCa; \ + MOVQ rc, rT1; \ + ORQ rBi, rCa; \ + XORQ rBa, rT1; \ + XORQ rT1, rCa; \ + MOVQ rCa, _ba(oState); \ + \ + XORQ rDu, rBu; \ + ROLQ $14, rBu; \ + MOVQ rBa, rCu; \ + ANDQ rBe, rCu; \ + XORQ rBu, rCu; \ + MOVQ rCu, _bu(oState); \ + \ + XORQ rDo, rBo; \ + ROLQ $21, rBo; \ + MOVQ rBo, rT1; \ + ANDQ rBu, rT1; \ + XORQ rBi, rT1; \ + MOVQ rT1, _bi(oState); \ + \ + NOTQ rBi; \ + ORQ rBa, rBu; \ + ORQ rBo, rBi; \ + XORQ rBo, rBu; \ + XORQ rBe, rBi; \ + MOVQ rBu, _bo(oState); \ + MOVQ rBi, _be(oState); \ + B_RBI_RCE; \ + \ + /* Result g */ \ + MOVQ _gu(iState), rBe; \ + XORQ rDu, rBe; \ + MOVQ _ka(iState), rBi; \ + ROLQ $20, rBe; \ + XORQ rDa, rBi; \ + ROLQ $3, rBi; \ + MOVQ _bo(iState), rBa; \ + MOVQ rBe, rT1; \ + ORQ rBi, rT1; \ + XORQ rDo, rBa; \ + MOVQ _me(iState), rBo; \ + MOVQ _si(iState), rBu; \ + ROLQ $28, rBa; \ + XORQ rBa, rT1; \ + MOVQ rT1, _ga(oState); \ + G_RT1_RCA; \ + \ + XORQ rDe, rBo; \ + ROLQ $45, rBo; \ + MOVQ rBi, rT1; \ + ANDQ rBo, rT1; \ + XORQ rBe, rT1; \ + MOVQ rT1, _ge(oState); \ + G_RT1_RCE; \ + \ + XORQ rDi, rBu; \ + ROLQ $61, rBu; \ + MOVQ rBu, rT1; \ + ORQ rBa, rT1; \ + XORQ rBo, rT1; \ + MOVQ rT1, _go(oState); \ + \ + ANDQ rBe, rBa; \ + XORQ rBu, rBa; \ + MOVQ rBa, _gu(oState); \ + NOTQ rBu; \ + G_RBA_RCU; \ + \ + ORQ rBu, rBo; \ + XORQ rBi, rBo; \ + MOVQ rBo, _gi(oState); \ + \ + /* Result k */ \ + MOVQ _be(iState), rBa; \ + MOVQ _gi(iState), rBe; \ + MOVQ _ko(iState), rBi; \ + MOVQ _mu(iState), rBo; \ + MOVQ _sa(iState), rBu; \ + XORQ rDi, rBe; \ + ROLQ $6, rBe; \ + XORQ rDo, rBi; \ + ROLQ $25, rBi; \ + MOVQ rBe, rT1; \ + ORQ rBi, rT1; \ + XORQ rDe, rBa; \ + ROLQ $1, rBa; \ + XORQ rBa, rT1; \ + MOVQ rT1, _ka(oState); \ + K_RT1_RCA; \ + \ + XORQ rDu, rBo; \ + ROLQ $8, rBo; \ + MOVQ rBi, rT1; \ + ANDQ rBo, rT1; \ + XORQ rBe, rT1; \ + MOVQ rT1, _ke(oState); \ + K_RT1_RCE; \ + \ + XORQ rDa, rBu; \ + ROLQ $18, rBu; \ + NOTQ rBo; \ + MOVQ rBo, rT1; \ + ANDQ rBu, rT1; \ + XORQ rBi, rT1; \ + MOVQ rT1, _ki(oState); \ + \ + MOVQ rBu, rT1; \ + ORQ rBa, rT1; \ + XORQ rBo, rT1; \ + MOVQ rT1, _ko(oState); \ + \ + ANDQ rBe, rBa; \ + XORQ rBu, rBa; \ + MOVQ rBa, _ku(oState); \ + K_RBA_RCU; \ + \ + /* Result m */ \ + MOVQ _ga(iState), rBe; \ + XORQ rDa, rBe; \ + MOVQ _ke(iState), rBi; \ + ROLQ $36, rBe; \ + XORQ rDe, rBi; \ + MOVQ _bu(iState), rBa; \ + ROLQ $10, rBi; \ + MOVQ rBe, rT1; \ + MOVQ _mi(iState), rBo; \ + ANDQ rBi, rT1; \ + XORQ rDu, rBa; \ + MOVQ _so(iState), rBu; \ + ROLQ $27, rBa; \ + XORQ rBa, rT1; \ + MOVQ rT1, _ma(oState); \ + M_RT1_RCA; \ + \ + XORQ rDi, rBo; \ + ROLQ $15, rBo; \ + MOVQ rBi, rT1; \ + ORQ rBo, rT1; \ + XORQ rBe, rT1; \ + MOVQ rT1, _me(oState); \ + M_RT1_RCE; \ + \ + XORQ rDo, rBu; \ + ROLQ $56, rBu; \ + NOTQ rBo; \ + MOVQ rBo, rT1; \ + ORQ rBu, rT1; \ + XORQ rBi, rT1; \ + MOVQ rT1, _mi(oState); \ + \ + ORQ rBa, rBe; \ + XORQ rBu, rBe; \ + MOVQ rBe, _mu(oState); \ + \ + ANDQ rBa, rBu; \ + XORQ rBo, rBu; \ + MOVQ rBu, _mo(oState); \ + M_RBE_RCU; \ + \ + /* Result s */ \ + MOVQ _bi(iState), rBa; \ + MOVQ _go(iState), rBe; \ + MOVQ _ku(iState), rBi; \ + XORQ rDi, rBa; \ + MOVQ _ma(iState), rBo; \ + ROLQ $62, rBa; \ + XORQ rDo, rBe; \ + MOVQ _se(iState), rBu; \ + ROLQ $55, rBe; \ + \ + XORQ rDu, rBi; \ + MOVQ rBa, rDu; \ + XORQ rDe, rBu; \ + ROLQ $2, rBu; \ + ANDQ rBe, rDu; \ + XORQ rBu, rDu; \ + MOVQ rDu, _su(oState); \ + \ + ROLQ $39, rBi; \ + S_RDU_RCU; \ + NOTQ rBe; \ + XORQ rDa, rBo; \ + MOVQ rBe, rDa; \ + ANDQ rBi, rDa; \ + XORQ rBa, rDa; \ + MOVQ rDa, _sa(oState); \ + S_RDA_RCA; \ + \ + ROLQ $41, rBo; \ + MOVQ rBi, rDe; \ + ORQ rBo, rDe; \ + XORQ rBe, rDe; \ + MOVQ rDe, _se(oState); \ + S_RDE_RCE; \ + \ + MOVQ rBo, rDi; \ + MOVQ rBu, rDo; \ + ANDQ rBu, rDi; \ + ORQ rBa, rDo; \ + XORQ rBi, rDi; \ + XORQ rBo, rDo; \ + MOVQ rDi, _si(oState); \ + MOVQ rDo, _so(oState) \ + +// func keccakF1600(state *[25]uint64) +TEXT ·keccakF1600(SB), 0, $200-8 + MOVQ state+0(FP), rpState + + // Convert the user state into an internal state + NOTQ _be(rpState) + NOTQ _bi(rpState) + NOTQ _go(rpState) + NOTQ _ki(rpState) + NOTQ _mi(rpState) + NOTQ _sa(rpState) + + // Execute the KeccakF permutation + MOVQ _ba(rpState), rCa + MOVQ _be(rpState), rCe + MOVQ _bu(rpState), rCu + + XORQ _ga(rpState), rCa + XORQ _ge(rpState), rCe + XORQ _gu(rpState), rCu + + XORQ _ka(rpState), rCa + XORQ _ke(rpState), rCe + XORQ _ku(rpState), rCu + + XORQ _ma(rpState), rCa + XORQ _me(rpState), rCe + XORQ _mu(rpState), rCu + + XORQ _sa(rpState), rCa + XORQ _se(rpState), rCe + MOVQ _si(rpState), rDi + MOVQ _so(rpState), rDo + XORQ _su(rpState), rCu + + mKeccakRound(rpState, rpStack, $0x0000000000000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x0000000000008082, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x800000000000808a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x8000000080008000, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x000000000000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x8000000000008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x000000000000008a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x0000000000000088, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x0000000080008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x000000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x000000008000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x800000000000008b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x8000000000008089, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x8000000000008003, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x8000000000008002, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x8000000000000080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x000000000000800a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x800000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x8000000000008080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpState, rpStack, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) + mKeccakRound(rpStack, rpState, $0x8000000080008008, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP) + + // Revert the internal state to the user state + NOTQ _be(rpState) + NOTQ _bi(rpState) + NOTQ _go(rpState) + NOTQ _ki(rpState) + NOTQ _mi(rpState) + NOTQ _sa(rpState) + + RET diff --git a/hash/shake/sha3.go b/hash/shake/sha3.go new file mode 100644 index 0000000..2c01bb8 --- /dev/null +++ b/hash/shake/sha3.go @@ -0,0 +1,198 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package shake + +import "hash" + +// spongeDirection indicates the direction bytes are flowing through the sponge. +type spongeDirection int + +const ( + // spongeAbsorbing indicates that the sponge is absorbing input. + spongeAbsorbing spongeDirection = iota + // spongeSqueezing indicates that the sponge is being squeezed. + spongeSqueezing +) + +const ( + // maxRate is the maximum size of the internal buffer. SHAKE-256 + // currently needs the largest buffer. + maxRate = 168 +) + +type state struct { + // Generic sponge components. + a [25]uint64 // main state of the hash + buf []byte // points into storage + rate int // the number of bytes of state to use + + // dsbyte contains the "domain separation" bits and the first bit of + // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the + // SHA-3 and SHAKE functions by appending bitstrings to the message. + // Using a little-endian bit-ordering convention, these are "01" for SHA-3 + // and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the + // padding rule from section 5.1 is applied to pad the message to a multiple + // of the rate, which involves adding a "1" bit, zero or more "0" bits, and + // a final "1" bit. We merge the first "1" bit from the padding into dsbyte, + // giving 00000110b (0x06) and 00011111b (0x1f). + // [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf + // "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and + // Extendable-Output Functions (May 2014)" + dsbyte byte + storage [maxRate]byte + + // Specific to SHA-3 and SHAKE. + outputLen int // the default output size in bytes + state spongeDirection // whether the sponge is absorbing or squeezing +} + +// BlockSize returns the rate of sponge underlying this hash function. +func (d *state) BlockSize() int { return d.rate } + +// Size returns the output size of the hash function in bytes. +func (d *state) Size() int { return d.outputLen } + +// Reset clears the internal state by zeroing the sponge state and +// the byte buffer, and setting Sponge.state to absorbing. +func (d *state) Reset() { + // Zero the permutation's state. + for i := range d.a { + d.a[i] = 0 + } + d.state = spongeAbsorbing + d.buf = d.storage[:0] +} + +func (d *state) clone() *state { + ret := *d + if ret.state == spongeAbsorbing { + ret.buf = ret.storage[:len(ret.buf)] + } else { + ret.buf = ret.storage[d.rate-cap(d.buf) : d.rate] + } + + return &ret +} + +// permute applies the KeccakF-1600 permutation. It handles +// any input-output buffering. +func (d *state) permute() { + switch d.state { + case spongeAbsorbing: + // If we're absorbing, we need to xor the input into the state + // before applying the permutation. + xorIn(d, d.buf) + d.buf = d.storage[:0] + keccakF1600(&d.a) + case spongeSqueezing: + // If we're squeezing, we need to apply the permutatin before + // copying more output. + keccakF1600(&d.a) + d.buf = d.storage[:d.rate] + copyOut(d, d.buf) + } +} + +// pads appends the domain separation bits in dsbyte, applies +// the multi-bitrate 10..1 padding rule, and permutes the state. +func (d *state) padAndPermute(dsbyte byte) { + if d.buf == nil { + d.buf = d.storage[:0] + } + // Pad with this instance's domain-separator bits. We know that there's + // at least one byte of space in d.buf because, if it were full, + // permute would have been called to empty it. dsbyte also contains the + // first one bit for the padding. See the comment in the state struct. + d.buf = append(d.buf, dsbyte) + zerosStart := len(d.buf) + d.buf = d.storage[:d.rate] + for i := zerosStart; i < d.rate; i++ { + d.buf[i] = 0 + } + // This adds the final one bit for the padding. Because of the way that + // bits are numbered from the LSB upwards, the final bit is the MSB of + // the last byte. + d.buf[d.rate-1] ^= 0x80 + // Apply the permutation + d.permute() + d.state = spongeSqueezing + d.buf = d.storage[:d.rate] + copyOut(d, d.buf) +} + +// Write absorbs more data into the hash's state. It produces an error +// if more data is written to the ShakeHash after writing +func (d *state) Write(p []byte) (written int, err error) { + if d.state != spongeAbsorbing { + panic("sha3: write to sponge after read") + } + if d.buf == nil { + d.buf = d.storage[:0] + } + written = len(p) + + for len(p) > 0 { + if len(d.buf) == 0 && len(p) >= d.rate { + // The fast path; absorb a full "rate" bytes of input and apply the permutation. + xorIn(d, p[:d.rate]) + p = p[d.rate:] + keccakF1600(&d.a) + } else { + // The slow path; buffer the input until we can fill the sponge, and then xor it in. + todo := d.rate - len(d.buf) + if todo > len(p) { + todo = len(p) + } + d.buf = append(d.buf, p[:todo]...) + p = p[todo:] + + // If the sponge is full, apply the permutation. + if len(d.buf) == d.rate { + d.permute() + } + } + } + + return +} + +// Read squeezes an arbitrary number of bytes from the sponge. +func (d *state) Read(out []byte) (n int, err error) { + // If we're still absorbing, pad and apply the permutation. + if d.state == spongeAbsorbing { + d.padAndPermute(d.dsbyte) + } + + n = len(out) + + // Now, do the squeezing. + for len(out) > 0 { + n := copy(out, d.buf) + d.buf = d.buf[n:] + out = out[n:] + + // Apply the permutation if we've squeezed the sponge dry. + if len(d.buf) == 0 { + d.permute() + } + } + + return +} + +// Sum applies padding to the hash state and then squeezes out the desired +// number of output bytes. +func (d *state) Sum(in []byte) []byte { + // Make a copy of the original hash so that caller can keep writing + // and summing. + dup := d.clone() + hash := make([]byte, dup.outputLen) + dup.Read(hash) + return append(in, hash...) +} + +// Only use this function if you require compatibility with an existing cryptosystem +// that uses non-standard padding. All other users should use New256 instead. +func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} } diff --git a/hash/shake/sha3_test.go b/hash/shake/sha3_test.go new file mode 100644 index 0000000..9176a20 --- /dev/null +++ b/hash/shake/sha3_test.go @@ -0,0 +1,416 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package shake + +// Tests include all the ShortMsgKATs provided by the Keccak team at +// https://github.com/gvanas/KeccakCodePackage +// +// They only include the zero-bit case of the bitwise testvectors +// published by NIST in the draft of FIPS-202. + +import ( + "bytes" + "compress/flate" + "encoding/hex" + "encoding/json" + "fmt" + "hash" + "os" + "strings" + "testing" +) + +const ( + testString = "brekeccakkeccak koax koax" + katFilename = "testdata/keccakKats.json.deflate" +) + +// testShakes contains functions that return sha3.CShake instances for +// with output-length equal to the KAT length. +var testShakes = map[string]struct { + constructor func(N []byte, S []byte) *CShake + defAlgoName string + defCustomStr string +}{ + // NewCShake without customization produces same result as SHAKE + "SHAKE128": {NewCShake128, "", ""}, + "SHAKE256": {NewCShake256, "", ""}, + "cSHAKE128": {NewCShake128, "CSHAKE128", "CustomStrign"}, + "cSHAKE256": {NewCShake256, "CSHAKE256", "CustomStrign"}, +} + +// decodeHex converts a hex-encoded string into a raw byte string. +func decodeHex(s string) []byte { + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +// structs used to marshal JSON test-cases. +type KeccakKats struct { + Kats map[string][]struct { + Digest string `json:"digest"` + Length int64 `json:"length"` + Message string `json:"message"` + + // Defined only for cSHAKE + N string `json:"N"` + S string `json:"S"` + } +} + +func testUnalignedAndGeneric(t *testing.T, testf func(impl string)) { + xorInOrig, copyOutOrig := xorIn, copyOut + xorIn, copyOut = xorInGeneric, copyOutGeneric + testf("generic") + if xorImplementationUnaligned != "generic" { + xorIn, copyOut = xorInUnaligned, copyOutUnaligned + testf("unaligned") + } + xorIn, copyOut = xorInOrig, copyOutOrig +} + +// TestKeccakKats tests the SHA-3 and Shake implementations against all the +// ShortMsgKATs from https://github.com/gvanas/KeccakCodePackage +// (The testvectors are stored in keccakKats.json.deflate due to their length.) +func TestKeccakKats(t *testing.T) { + testUnalignedAndGeneric(t, func(impl string) { + // Read the KATs. + deflated, err := os.Open(katFilename) + if err != nil { + t.Errorf("error opening %s: %s", katFilename, err) + } + file := flate.NewReader(deflated) + dec := json.NewDecoder(file) + var katSet KeccakKats + err = dec.Decode(&katSet) + if err != nil { + t.Errorf("error decoding KATs: %s", err) + } + + for algo, v := range testShakes { + for _, kat := range katSet.Kats[algo] { + N, err := hex.DecodeString(kat.N) + if err != nil { + t.Errorf("error decoding KAT: %s", err) + } + + S, err := hex.DecodeString(kat.S) + if err != nil { + t.Errorf("error decoding KAT: %s", err) + } + d := v.constructor(N, S) + in, err := hex.DecodeString(kat.Message) + if err != nil { + t.Errorf("error decoding KAT: %s", err) + } + + d.Write(in[:kat.Length/8]) + out := make([]byte, len(kat.Digest)/2) + d.Read(out) + got := strings.ToUpper(hex.EncodeToString(out)) + if got != kat.Digest { + t.Errorf("function=%s, implementation=%s, length=%d N:%s\n S:%s\nmessage:\n %s \ngot:\n %s\nwanted:\n %s", + algo, impl, kat.Length, kat.N, kat.S, kat.Message, got, kat.Digest) + t.Logf("wanted %+v", kat) + t.FailNow() + } + continue + } + } + }) +} + +// TestKeccak does a basic test of the non-standardized Keccak hash functions. +func TestKeccak(t *testing.T) { + tests := []struct { + fn func() hash.Hash + data []byte + want string + }{ + { + NewLegacyKeccak256, + []byte("abc"), + "4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45", + }, + } + + for _, u := range tests { + h := u.fn() + h.Write(u.data) + got := h.Sum(nil) + want := decodeHex(u.want) + if !bytes.Equal(got, want) { + t.Errorf("unexpected hash for size %d: got '%x' want '%s'", h.Size()*8, got, u.want) + } + } +} + +// TestUnalignedWrite tests that writing data in an arbitrary pattern with +// small input buffers. +func TestUnalignedWrite(t *testing.T) { + testUnalignedAndGeneric(t, func(impl string) { + buf := sequentialBytes(0x10000) + + // Same for SHAKE + for alg, df := range testShakes { + want := make([]byte, 16) + got := make([]byte, 16) + d := df.constructor([]byte(df.defAlgoName), []byte(df.defCustomStr)) + + d.Reset() + d.Write(buf) + d.Read(want) + d.Reset() + for i := 0; i < len(buf); { + // Cycle through offsets which make a 137 byte sequence. + // Because 137 is prime this sequence should exercise all corner cases. + offsets := [17]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1} + for _, j := range offsets { + if v := len(buf) - i; v < j { + j = v + } + d.Write(buf[i : i+j]) + i += j + } + } + d.Read(got) + if !bytes.Equal(got, want) { + t.Errorf("Unaligned writes, implementation=%s, alg=%s\ngot %q, want %q", impl, alg, got, want) + } + } + }) +} + +/* TODO: To redesign those tests and unlock + +// TestAppend checks that appending works when reallocation is necessary. +func TestAppend(t *testing.T) { + testUnalignedAndGeneric(t, func(impl string) { + d := NewShake128() + + for capacity := 2; capacity <= 66; capacity += 64 { + // The first time around the loop, Sum will have to reallocate. + // The second time, it will not. + buf := make([]byte, 2, capacity) + d.Reset() + d.Write([]byte{0xcc}) + buf = d.Sum(buf) + expected := "0000DF70ADC49B2E76EEE3A6931B93FA41841C3AF2CDF5B32A18B5478C39" + if got := strings.ToUpper(hex.EncodeToString(buf)); got != expected { + t.Errorf("got %s, want %s", got, expected) + } + } + }) +} + +// TestAppendNoRealloc tests that appending works when no reallocation is necessary. +func TestAppendNoRealloc(t *testing.T) { + testUnalignedAndGeneric(t, func(impl string) { + buf := make([]byte, 1, 200) + d := NewShake128() + d.Write([]byte{0xcc}) + buf = d.Sum(buf) + expected := "00DF70ADC49B2E76EEE3A6931B93FA41841C3AF2CDF5B32A18B5478C39" + if got := strings.ToUpper(hex.EncodeToString(buf)); got != expected { + t.Errorf("%s: got %s, want %s", impl, got, expected) + } + }) +} +*/ + +// TestSqueezing checks that squeezing the full output a single time produces +// the same output as repeatedly squeezing the instance. +func TestSqueezing(t *testing.T) { + testUnalignedAndGeneric(t, func(impl string) { + for algo, v := range testShakes { + d0 := v.constructor([]byte(v.defAlgoName), []byte(v.defCustomStr)) + d0.Write([]byte(testString)) + ref := make([]byte, 32) + d0.Read(ref) + + d1 := v.constructor([]byte(v.defAlgoName), []byte(v.defCustomStr)) + d1.Write([]byte(testString)) + var multiple []byte + for range ref { + one := make([]byte, 1) + d1.Read(one) + multiple = append(multiple, one...) + } + if !bytes.Equal(ref, multiple) { + t.Errorf("%s (%s): squeezing %d bytes one at a time failed", algo, impl, len(ref)) + } + } + }) +} + +// sequentialBytes produces a buffer of size consecutive bytes 0x00, 0x01, ..., used for testing. +func sequentialBytes(size int) []byte { + result := make([]byte, size) + for i := range result { + result[i] = byte(i) + } + return result +} + +func TestReset(t *testing.T) { + out1 := make([]byte, 32) + out2 := make([]byte, 32) + + for _, v := range testShakes { + // Calculate hash for the first time + c := v.constructor([]byte(v.defAlgoName), []byte(v.defCustomStr)) + c.Write(sequentialBytes(0x100)) + c.Read(out1) + + // Calculate hash again + c.Reset() + c.Write(sequentialBytes(0x100)) + c.Read(out2) + + if !bytes.Equal(out1, out2) { + t.Error("\nExpected:\n", out1, "\ngot:\n", out2) + } + } +} + +func TestClone(t *testing.T) { + out1 := make([]byte, 16) + out2 := make([]byte, 16) + in := sequentialBytes(0x100) + + for _, v := range testShakes { + h1 := v.constructor([]byte(v.defAlgoName), []byte(v.defCustomStr)) + h1.Write([]byte{0x01}) + + h2 := h1.Clone() + + h1.Write(in) + h1.Read(out1) + + h2.Write(in) + h2.Read(out2) + + if !bytes.Equal(out1, out2) { + t.Error("\nExpected:\n", hex.EncodeToString(out1), "\ngot:\n", hex.EncodeToString(out2)) + } + } +} + +// BenchmarkPermutationFunction measures the speed of the permutation function +// with no input data. +func BenchmarkPermutationFunction(b *testing.B) { + b.SetBytes(int64(200)) + var lanes [25]uint64 + for i := 0; i < b.N; i++ { + keccakF1600(&lanes) + } +} + +// benchmarkHash tests the speed to hash num buffers of buflen each. +func benchmarkHash(b *testing.B, h hash.Hash, size, num int) { + b.StopTimer() + h.Reset() + data := sequentialBytes(size) + b.SetBytes(int64(size * num)) + b.StartTimer() + + var state []byte + for i := 0; i < b.N; i++ { + for j := 0; j < num; j++ { + h.Write(data) + } + state = h.Sum(state[:0]) + } + b.StopTimer() + h.Reset() +} + +// benchmarkShake is specialized to the Shake instances, which don't +// require a copy on reading output. +func benchmarkShake(b *testing.B, h *CShake, size, num int) { + b.StopTimer() + h.Reset() + data := sequentialBytes(size) + d := make([]byte, 32) + + b.SetBytes(int64(size * num)) + b.StartTimer() + + for i := 0; i < b.N; i++ { + h.Reset() + for j := 0; j < num; j++ { + h.Write(data) + } + h.Read(d) + } +} + +func BenchmarkShake128_MTU(b *testing.B) { benchmarkShake(b, NewShake128(), 1350, 1) } +func BenchmarkShake256_MTU(b *testing.B) { benchmarkShake(b, NewShake256(), 1350, 1) } +func BenchmarkShake256_16x(b *testing.B) { benchmarkShake(b, NewShake256(), 16, 1024) } +func BenchmarkShake256_1MiB(b *testing.B) { benchmarkShake(b, NewShake256(), 1024, 1024) } + +func Example_sum() { + buf := []byte("some data to hash") + // A hash needs to be 64 bytes long to have 256-bit collision resistance. + h := make([]byte, 64) + // Compute a 64-byte hash of buf and put it in h. + ShakeSum256(h, buf) + fmt.Printf("%x\n", h) + // Output: 0f65fe41fc353e52c55667bb9e2b27bfcc8476f2c413e9437d272ee3194a4e3146d05ec04a25d16b8f577c19b82d16b1424c3e022e783d2b4da98de3658d363d +} + +func Example_mac() { + k := []byte("this is a secret key; you should generate a strong random key that's at least 32 bytes long") + buf := []byte("and this is some data to authenticate") + // A MAC with 32 bytes of output has 256-bit security strength -- if you use at least a 32-byte-long key. + h := make([]byte, 32) + d := NewShake256() + // Write the key into the hash. + d.Write(k) + // Now write the data. + d.Write(buf) + // Read 32 bytes of output from the hash into h. + d.Read(h) + fmt.Printf("%x\n", h) + // Output: 78de2974bd2711d5549ffd32b753ef0f5fa80a0db2556db60f0987eb8a9218ff +} + +func ExampleCShake256() { + out := make([]byte, 32) + msg := []byte("The quick brown fox jumps over the lazy dog") + + // Example 1: Simple cshake + c1 := NewCShake256([]byte("NAME"), []byte("Partition1")) + c1.Write(msg) + c1.Read(out) + fmt.Println(hex.EncodeToString(out)) + + // Example 2: Different customization string produces different digest + c1 = NewCShake256([]byte("NAME"), []byte("Partition2")) + c1.Write(msg) + c1.Read(out) + fmt.Println(hex.EncodeToString(out)) + + // Example 3: Different output length produces different digest + out = make([]byte, 64) + c1 = NewCShake256([]byte("NAME"), []byte("Partition1")) + c1.Write(msg) + c1.Read(out) + fmt.Println(hex.EncodeToString(out)) + + // Example 4: Next read produces different result + c1.Read(out) + fmt.Println(hex.EncodeToString(out)) + + // Output: + //a90a4c6ca9af2156eba43dc8398279e6b60dcd56fb21837afe6c308fd4ceb05b + //a8db03e71f3e4da5c4eee9d28333cdd355f51cef3c567e59be5beb4ecdbb28f0 + //a90a4c6ca9af2156eba43dc8398279e6b60dcd56fb21837afe6c308fd4ceb05b9dd98c6ee866ca7dc5a39d53e960f400bcd5a19c8a2d6ec6459f63696543a0d8 + //85e73a72228d08b46515553ca3a29d47df3047e5d84b12d6c2c63e579f4fd1105716b7838e92e981863907f434bfd4443c9e56ea09da998d2f9b47db71988109 +} diff --git a/hash/shake/shake.go b/hash/shake/shake.go new file mode 100644 index 0000000..726d97a --- /dev/null +++ b/hash/shake/shake.go @@ -0,0 +1,146 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package shake + +// This file defines the CShake interface, and provides +// functions for creating SHAKE and cSHAKE instances, as well as utility +// functions for hashing bytes to arbitrary-length output. +// +// +// SHAKE implementation is based on FIPS PUB 202 [1] +// cSHAKE implementations is based on NIST SP 800-185 [2] +// +// [1] https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf +// [2] https://doi.org/10.6028/NIST.SP.800-185 + +import ( + "encoding/binary" +) + +// cSHAKE specific context +type CShake struct { + state // SHA-3 state context and Read/Write operations + + // initBlock is the cSHAKE specific initialization set of bytes. It is initialized + // by newCShake function and stores concatenation of N followed by S, encoded + // by the method specified in 3.3 of [1]. + // It is stored here in order for Reset() to be able to put context into + // initial state. + initBlock []byte +} + +// Consts for configuring initial SHA-3 state +const ( + dsbyteShake = 0x1f + dsbyteCShake = 0x04 + rate128 = 168 + rate256 = 136 +) + +func bytepad(input []byte, w int) []byte { + // leftEncode always returns max 9 bytes + buf := make([]byte, 0, 9+len(input)+w) + buf = append(buf, leftEncode(uint64(w))...) + buf = append(buf, input...) + padlen := w - (len(buf) % w) + return append(buf, make([]byte, padlen)...) +} + +func leftEncode(value uint64) []byte { + var b [9]byte + binary.BigEndian.PutUint64(b[1:], value) + // Trim all but last leading zero bytes + i := byte(1) + for i < 8 && b[i] == 0 { + i++ + } + // Prepend number of encoded bytes + b[i-1] = 9 - i + return b[i-1:] +} + +func newCShake(N, S []byte, rate int, dsbyte byte) *CShake { + + // leftEncode returns max 9 bytes + initBlock := make([]byte, 0, 9*2+len(N)+len(S)) + initBlock = append(initBlock, leftEncode(uint64(len(N)*8))...) + initBlock = append(initBlock, N...) + initBlock = append(initBlock, leftEncode(uint64(len(S)*8))...) + initBlock = append(initBlock, S...) + + c := CShake{ + state: state{rate: rate, dsbyte: dsbyte}, + initBlock: bytepad(initBlock, rate), + } + c.Write(c.initBlock) + return &c +} + +// Reset resets the hash to initial state. +func (c *CShake) Reset() { + c.state.Reset() + c.Write(c.initBlock) +} + +// Clone returns copy of a cSHAKE context within its current state. +func (c *CShake) Clone() CShake { + b := make([]byte, len(c.initBlock)) + copy(b, c.initBlock) + return CShake{state: *c.clone(), initBlock: b} +} + +// NewShake128 creates a new SHAKE128 variable-output-length CShake. +// Its generic security strength is 128 bits against all attacks if at +// least 32 bytes of its output are used. +func NewShake128() *CShake { + return &CShake{state{rate: rate128, dsbyte: dsbyteShake}, nil} +} + +// NewShake256 creates a new SHAKE256 variable-output-length CShake. +// Its generic security strength is 256 bits against all attacks if +// at least 64 bytes of its output are used. +func NewShake256() *CShake { + return &CShake{state{rate: rate256, dsbyte: dsbyteShake}, nil} +} + +// NewCShake128 creates a new instance of cSHAKE128 variable-output-length CShake, +// a customizable variant of SHAKE128. +// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is +// desired. S is a customization byte string used for domain separation - two cSHAKE +// computations on same input with different S yield unrelated outputs. +// When N and S are both empty, this is equivalent to NewShake128. +func NewCShake128(N, S []byte) *CShake { + if len(N) == 0 && len(S) == 0 { + return NewShake128() + } + return newCShake(N, S, rate128, dsbyteCShake) +} + +// NewCShake256 creates a new instance of cSHAKE256 variable-output-length CShake, +// a customizable variant of SHAKE256. +// N is used to define functions based on cSHAKE, it can be empty when plain cSHAKE is +// desired. S is a customization byte string used for domain separation - two cSHAKE +// computations on same input with different S yield unrelated outputs. +// When N and S are both empty, this is equivalent to NewShake256. +func NewCShake256(N, S []byte) *CShake { + if len(N) == 0 && len(S) == 0 { + return NewShake256() + } + return newCShake(N, S, rate256, dsbyteCShake) +} + +// ShakeSum128 writes an arbitrary-length digest of data into hash. +func ShakeSum128(hash, data []byte) { + h := NewShake128() + h.Write(data) + h.Read(hash) +} + +// ShakeSum256 writes an arbitrary-length digest of data into hash. +func ShakeSum256(hash, data []byte) { + h := NewShake256() + h.Write(data) + h.Read(hash) +} diff --git a/hash/shake/testdata/keccakKats.json.deflate b/hash/shake/testdata/keccakKats.json.deflate new file mode 100644 index 0000000..62e85ae Binary files /dev/null and b/hash/shake/testdata/keccakKats.json.deflate differ diff --git a/hash/shake/xor.go b/hash/shake/xor.go new file mode 100644 index 0000000..b6b4370 --- /dev/null +++ b/hash/shake/xor.go @@ -0,0 +1,16 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!386,!ppc64le appengine + +package shake + +var ( + xorIn = xorInGeneric + copyOut = copyOutGeneric + xorInUnaligned = xorInGeneric + copyOutUnaligned = copyOutGeneric +) + +const xorImplementationUnaligned = "generic" diff --git a/hash/shake/xor_generic.go b/hash/shake/xor_generic.go new file mode 100644 index 0000000..d198554 --- /dev/null +++ b/hash/shake/xor_generic.go @@ -0,0 +1,28 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package shake + +import "encoding/binary" + +// xorInGeneric xors the bytes in buf into the state; it +// makes no non-portable assumptions about memory layout +// or alignment. +func xorInGeneric(d *state, buf []byte) { + n := len(buf) / 8 + + for i := 0; i < n; i++ { + a := binary.LittleEndian.Uint64(buf) + d.a[i] ^= a + buf = buf[8:] + } +} + +// copyOutGeneric copies ulint64s to a byte buffer. +func copyOutGeneric(d *state, b []byte) { + for i := 0; len(b) >= 8; i++ { + binary.LittleEndian.PutUint64(b, d.a[i]) + b = b[8:] + } +} diff --git a/hash/shake/xor_unaligned.go b/hash/shake/xor_unaligned.go new file mode 100644 index 0000000..d5dd014 --- /dev/null +++ b/hash/shake/xor_unaligned.go @@ -0,0 +1,58 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 386 ppc64le +// +build !appengine + +package shake + +import "unsafe" + +func xorInUnaligned(d *state, buf []byte) { + bw := (*[maxRate / 8]uint64)(unsafe.Pointer(&buf[0])) + n := len(buf) + if n >= 72 { + d.a[0] ^= bw[0] + d.a[1] ^= bw[1] + d.a[2] ^= bw[2] + d.a[3] ^= bw[3] + d.a[4] ^= bw[4] + d.a[5] ^= bw[5] + d.a[6] ^= bw[6] + d.a[7] ^= bw[7] + d.a[8] ^= bw[8] + } + if n >= 104 { + d.a[9] ^= bw[9] + d.a[10] ^= bw[10] + d.a[11] ^= bw[11] + d.a[12] ^= bw[12] + } + if n >= 136 { + d.a[13] ^= bw[13] + d.a[14] ^= bw[14] + d.a[15] ^= bw[15] + d.a[16] ^= bw[16] + } + if n >= 144 { + d.a[17] ^= bw[17] + } + if n >= 168 { + d.a[18] ^= bw[18] + d.a[19] ^= bw[19] + d.a[20] ^= bw[20] + } +} + +func copyOutUnaligned(d *state, buf []byte) { + ab := (*[maxRate]uint8)(unsafe.Pointer(&d.a[0])) + copy(buf, ab[:]) +} + +var ( + xorIn = xorInUnaligned + copyOut = copyOutUnaligned +) + +const xorImplementationUnaligned = "unaligned"